From d2c72d96dff5ba408bb107193b53e3134a516c4b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 8 Oct 2024 16:02:48 -0400
Subject: [PATCH 1/9] drm/amdgpu: enable enforce_isolation sysfs node on VFs

It should be enabled on both bare metal and VFs.

Fixes: e189be9b2e38 ("drm/amdgpu: Add enforce_isolation sysfs attribute")
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Cc: Amber Lin <Amber.Lin@amd.com>
Reviewed-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
(cherry picked from commit dc8847b054fd6679866ed4ee861e069e54c10799)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 83e54697f0ee..f1ffab5a1eae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1635,11 +1635,9 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (!amdgpu_sriov_vf(adev)) {
-		r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
-		if (r)
-			return r;
-	}
+	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
+	if (r)
+		return r;
 
 	r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
 	if (r)
@@ -1650,8 +1648,7 @@ int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
 
 void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
 {
-	if (!amdgpu_sriov_vf(adev))
-		device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
 	device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
 }
 

From c0ec082f10b7a1fd25e8c1e2a686440da913b7a3 Mon Sep 17 00:00:00 2001
From: Mohammed Anees <pvmohammedanees2003@gmail.com>
Date: Wed, 9 Oct 2024 17:58:31 +0530
Subject: [PATCH 2/9] drm/amdgpu: prevent BO_HANDLES error from being
 overwritten
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before this patch, if multiple BO_HANDLES chunks were submitted,
the error -EINVAL would be correctly set but could be overwritten
by the return value from amdgpu_cs_p1_bo_handles(). This patch
ensures that if there are multiple BO_HANDLES, we stop.

Fixes: fec5f8e8c6bc ("drm/amdgpu: disallow multiple BO_HANDLES chunks in one submit")
Signed-off-by: Mohammed Anees <pvmohammedanees2003@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 40f2cd98828f454bdc5006ad3d94330a5ea164b7)
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1e475eb01417..d891ab779ca7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -265,7 +265,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
 
 			/* Only a single BO list is allowed to simplify handling. */
 			if (p->bo_list)
-				ret = -EINVAL;
+				goto free_partial_kdata;
 
 			ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
 			if (ret)

From 7760d7f93c764625fedca176891238675fd06d62 Mon Sep 17 00:00:00 2001
From: Michael Chen <michael.chen@amd.com>
Date: Tue, 8 Oct 2024 15:29:50 -0400
Subject: [PATCH 3/9] drm/amdgpu/mes: fix issue of writing to the same log
 buffer from 2 MES pipes

With Unified MES enabled in gfx12, need separate event log buffer for the
2 MES pipes to avoid data overwrite.

Signed-off-by: Michael Chen <michael.chen@amd.com>
Reviewed-by: Jack Xiao <Jack.Xiao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 144df260f3daab42c4611021f929b3342de516e5)
Cc: stable@vger.kernel.org # 6.11.x
---
 drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 8d27421689c9..a37a6801c9ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -621,7 +621,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
 
 	if (amdgpu_mes_log_enable) {
 		mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
-		mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+		mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE;
 	}
 
 	return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
@@ -1336,7 +1336,7 @@ static int mes_v12_0_sw_init(void *handle)
 	adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
 	adev->mes.enable_legacy_queue_map = true;
 
-	adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE;
+	adev->mes.event_log_size = adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE) : AMDGPU_MES_LOG_BUFFER_SIZE;
 
 	r = amdgpu_mes_init(adev);
 	if (r)

From e7457532cb7167516263150ceae86f36d6ef9683 Mon Sep 17 00:00:00 2001
From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Date: Tue, 8 Oct 2024 19:01:48 +0530
Subject: [PATCH 4/9] drm/amd/amdgpu: Fix double unlock in amdgpu_mes_add_ring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch addresses a double unlock issue in the amdgpu_mes_add_ring
function. The mutex was being unlocked twice under certain error
conditions, which could lead to undefined behavior.

The fix ensures that the mutex is unlocked only once before jumping to
the clean_up_memory label. The unlock operation is moved to just before
the goto statement within the conditional block that checks the return
value of amdgpu_ring_init. This prevents the second unlock attempt after
the clean_up_memory label, which is no longer necessary as the mutex is
already unlocked by this point in the code flow.

This change resolves the potential double unlock and maintains the
correct mutex handling throughout the function.

Fixes below:
Commit d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue
submission"), leads to the following Smatch static checker warning:

	drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1240 amdgpu_mes_add_ring()
	warn: double unlock '&adev->mes.mutex_hidden' (orig line 1213)

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
    1143 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
    1144                         int queue_type, int idx,
    1145                         struct amdgpu_mes_ctx_data *ctx_data,
    1146                         struct amdgpu_ring **out)
    1147 {
    1148         struct amdgpu_ring *ring;
    1149         struct amdgpu_mes_gang *gang;
    1150         struct amdgpu_mes_queue_properties qprops = {0};
    1151         int r, queue_id, pasid;
    1152
    1153         /*
    1154          * Avoid taking any other locks under MES lock to avoid circular
    1155          * lock dependencies.
    1156          */
    1157         amdgpu_mes_lock(&adev->mes);
    1158         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
    1159         if (!gang) {
    1160                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
    1161                 amdgpu_mes_unlock(&adev->mes);
    1162                 return -EINVAL;
    1163         }
    1164         pasid = gang->process->pasid;
    1165
    1166         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
    1167         if (!ring) {
    1168                 amdgpu_mes_unlock(&adev->mes);
    1169                 return -ENOMEM;
    1170         }
    1171
    1172         ring->ring_obj = NULL;
    1173         ring->use_doorbell = true;
    1174         ring->is_mes_queue = true;
    1175         ring->mes_ctx = ctx_data;
    1176         ring->idx = idx;
    1177         ring->no_scheduler = true;
    1178
    1179         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
    1180                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
    1181                                       compute[ring->idx].mec_hpd);
    1182                 ring->eop_gpu_addr =
    1183                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
    1184         }
    1185
    1186         switch (queue_type) {
    1187         case AMDGPU_RING_TYPE_GFX:
    1188                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
    1189                 ring->me = adev->gfx.gfx_ring[0].me;
    1190                 ring->pipe = adev->gfx.gfx_ring[0].pipe;
    1191                 break;
    1192         case AMDGPU_RING_TYPE_COMPUTE:
    1193                 ring->funcs = adev->gfx.compute_ring[0].funcs;
    1194                 ring->me = adev->gfx.compute_ring[0].me;
    1195                 ring->pipe = adev->gfx.compute_ring[0].pipe;
    1196                 break;
    1197         case AMDGPU_RING_TYPE_SDMA:
    1198                 ring->funcs = adev->sdma.instance[0].ring.funcs;
    1199                 break;
    1200         default:
    1201                 BUG();
    1202         }
    1203
    1204         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
    1205                              AMDGPU_RING_PRIO_DEFAULT, NULL);
    1206         if (r)
    1207                 goto clean_up_memory;
    1208
    1209         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
    1210
    1211         dma_fence_wait(gang->process->vm->last_update, false);
    1212         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
    1213         amdgpu_mes_unlock(&adev->mes);
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    1214
    1215         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
    1216         if (r)
    1217                 goto clean_up_ring;
                         ^^^^^^^^^^^^^^^^^^

    1218
    1219         ring->hw_queue_id = queue_id;
    1220         ring->doorbell_index = qprops.doorbell_off;
    1221
    1222         if (queue_type == AMDGPU_RING_TYPE_GFX)
    1223                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
    1224         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
    1225                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
    1226                         queue_id);
    1227         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
    1228                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
    1229                         queue_id);
    1230         else
    1231                 BUG();
    1232
    1233         *out = ring;
    1234         return 0;
    1235
    1236 clean_up_ring:
    1237         amdgpu_ring_fini(ring);
    1238 clean_up_memory:
    1239         kfree(ring);
--> 1240         amdgpu_mes_unlock(&adev->mes);
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

    1241         return r;
    1242 }

Fixes: d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue submission")
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Suggested-by: Jack Xiao <Jack.Xiao@amd.com>
Reported by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Jack Xiao <Jack.Xiao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit bfaf1883605fd0c0dbabacd67ed49708470d5ea4)
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 10b61ff63802..7d4b540340e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1203,8 +1203,10 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
 
 	r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
-	if (r)
+	if (r) {
+		amdgpu_mes_unlock(&adev->mes);
 		goto clean_up_memory;
+	}
 
 	amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
 
@@ -1237,7 +1239,6 @@ clean_up_ring:
 	amdgpu_ring_fini(ring);
 clean_up_memory:
 	kfree(ring);
-	amdgpu_mes_unlock(&adev->mes);
 	return r;
 }
 

From 68d26c10ef503175df3142db6fcd75dd94860592 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Fri, 4 Oct 2024 16:28:07 -0400
Subject: [PATCH 5/9] drm/amdkfd: Accounting pdd vram_usage for svm

Process device data pdd->vram_usage is read by rocm-smi via sysfs, this
is currently missing the svm_bo usage accounting, so "rocm-smi
--showpids" per process VRAM usage report is incorrect.

Add pdd->vram_usage accounting when svm_bo allocation and release,
change to atomic64_t type because it is updated outside process mutex
now.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04)
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 26 ++++++++++++++++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9044bdb38cf4..3e6b4736a7fe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 
 		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
 			size >>= 1;
-		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+		atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
 	}
 
 	mutex_unlock(&p->mutex);
@@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
 		kfd_process_device_remove_obj_handle(
 			pdd, GET_IDR_HANDLE(args->handle));
 
-	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+	atomic64_sub(size, &pdd->vram_usage);
 
 err_unlock:
 err_pdd:
@@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
 		bo_bucket->restored_offset = offset;
 		/* Update the VRAM usage count */
-		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+		atomic64_add(bo_bucket->size, &pdd->vram_usage);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d6530febabad..26e48fdc8728 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -775,7 +775,7 @@ struct kfd_process_device {
 	enum kfd_pdd_bound bound;
 
 	/* VRAM usage */
-	uint64_t vram_usage;
+	atomic64_t vram_usage;
 	struct attribute attr_vram;
 	char vram_filename[MAX_SYSFS_FILENAME_LEN];
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d665ecdcd12f..d4aa843aacfd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
 	} else if (strncmp(attr->name, "vram_", 5) == 0) {
 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 							      attr_vram);
-		return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+		return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
 	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 							      attr_sdma);
@@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
 	pdd->bound = PDD_UNBOUND;
 	pdd->already_dequeued = false;
 	pdd->runtime_inuse = false;
-	pdd->vram_usage = 0;
+	atomic64_set(&pdd->vram_usage, 0);
 	pdd->sdma_past_activity_counter = 0;
 	pdd->user_gpu_id = dev->id;
 	atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 04e746923697..1893c27746a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -405,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref)
 		spin_lock(&svm_bo->list_lock);
 	}
 	spin_unlock(&svm_bo->list_lock);
+
+	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+		struct kfd_process_device *pdd;
+		struct kfd_process *p;
+		struct mm_struct *mm;
+
+		mm = svm_bo->eviction_fence->mm;
+		/*
+		 * The forked child process takes svm_bo device pages ref, svm_bo could be
+		 * released after parent process is gone.
+		 */
+		p = kfd_lookup_process_by_mm(mm);
+		if (p) {
+			pdd = kfd_get_process_device_data(svm_bo->node, p);
+			if (pdd)
+				atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+			kfd_unref_process(p);
+		}
+		mmput(mm);
+	}
+
 	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
 		/* We're not in the eviction worker. Signal the fence. */
 		dma_fence_signal(&svm_bo->eviction_fence->base);
@@ -532,6 +553,7 @@ int
 svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 			bool clear)
 {
+	struct kfd_process_device *pdd;
 	struct amdgpu_bo_param bp;
 	struct svm_range_bo *svm_bo;
 	struct amdgpu_bo_user *ubo;
@@ -623,6 +645,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 	list_add(&prange->svm_bo_list, &svm_bo->range_list);
 	spin_unlock(&svm_bo->list_lock);
 
+	pdd = svm_range_get_pdd_by_node(prange, node);
+	if (pdd)
+		atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
 	return 0;
 
 reserve_bo_failed:

From 7a1613e47e65ba6967085ad99dee95420346a0ce Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 3 Oct 2024 10:09:50 -0400
Subject: [PATCH 6/9] drm/amdgpu/smu13: always apply the powersave optimization

It can avoid margin issues in some very demanding applications.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3618
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3131
Fixes: c50fe289ed72 ("drm/amdgpu/swsmu: always force a state reprogram on init")
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 62f38b4ccaa6aa063ca781d80b10aacd39dc5c76)
Cc: stable@vger.kernel.org
---
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c  | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 1d024b122b0c..cb923e33fd6f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2555,18 +2555,16 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 	workload_mask = 1 << workload_type;
 
 	/* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
-	if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE) {
-		if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
-			((smu->adev->pm.fw_version == 0x004e6601) ||
-			(smu->adev->pm.fw_version >= 0x004e7300))) ||
-			(amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
-			 smu->adev->pm.fw_version >= 0x00504500)) {
-			workload_type = smu_cmn_to_asic_specific_index(smu,
-								CMN2ASIC_MAPPING_WORKLOAD,
-								PP_SMC_POWER_PROFILE_POWERSAVING);
-			if (workload_type >= 0)
-				workload_mask |= 1 << workload_type;
-		}
+	if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+	     ((smu->adev->pm.fw_version == 0x004e6601) ||
+	      (smu->adev->pm.fw_version >= 0x004e7300))) ||
+	    (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+	     smu->adev->pm.fw_version >= 0x00504500)) {
+		workload_type = smu_cmn_to_asic_specific_index(smu,
+							       CMN2ASIC_MAPPING_WORKLOAD,
+							       PP_SMC_POWER_PROFILE_POWERSAVING);
+		if (workload_type >= 0)
+			workload_mask |= 1 << workload_type;
 	}
 
 	ret = smu_cmn_send_smc_msg_with_param(smu,

From 28127dba64d8ae1a0b737b973d6d029908599611 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 14 Oct 2024 19:09:36 +0300
Subject: [PATCH 7/9] drm/radeon: Fix encoder->possible_clones
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include the encoder itself in its possible_clones bitmask.
In the past nothing validated that drivers were populating
possible_clones correctly, but that changed in commit
74d2aacbe840 ("drm: Validate encoder->possible_clones").
Looks like radeon never got the memo and is still not
following the rules 100% correctly.

This results in some warnings during driver initialization:
Bogus possible_clones: [ENCODER:46:TV-46] possible_clones=0x4 (full encoder mask=0x7)
WARNING: CPU: 0 PID: 170 at drivers/gpu/drm/drm_mode_config.c:615 drm_mode_config_validate+0x113/0x39c
...

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Fixes: 74d2aacbe840 ("drm: Validate encoder->possible_clones")
Reported-by: Erhard Furtner <erhard_f@mailbox.org>
Closes: https://lore.kernel.org/dri-devel/20241009000321.418e4294@yea/
Tested-by: Erhard Furtner <erhard_f@mailbox.org>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 3b6e7d40649c0d75572039aff9d0911864c689db)
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_encoders.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 0f723292409e..fafed331e0a0 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -43,7 +43,7 @@ static uint32_t radeon_encoder_clones(struct drm_encoder *encoder)
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct drm_encoder *clone_encoder;
-	uint32_t index_mask = 0;
+	uint32_t index_mask = drm_encoder_mask(encoder);
 	int count;
 
 	/* DIG routing gets problematic */

From cb07c8338fc2b9d5f949a19d4a07ee4d5ecf8793 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 2 Oct 2024 10:22:30 -0400
Subject: [PATCH 8/9] drm/amdgpu/swsmu: Only force workload setup on init

Needed to set the workload type at init time so that
we can apply the navi3x margin optimization.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3618
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3131
Fixes: c50fe289ed72 ("drm/amdgpu/swsmu: always force a state reprogram on init")
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 580ad7cbd4b7be8d2cb5ab5c1fca6bb76045eb0e)
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index bb3bc68dfc39..0cf598983552 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2226,7 +2226,7 @@ static int smu_bump_power_profile_mode(struct smu_context *smu,
 static int smu_adjust_power_state_dynamic(struct smu_context *smu,
 					  enum amd_dpm_forced_level level,
 					  bool skip_display_settings,
-					  bool force_update)
+					  bool init)
 {
 	int ret = 0;
 	int index = 0;
@@ -2255,7 +2255,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
 		}
 	}
 
-	if (force_update || smu_dpm_ctx->dpm_level != level) {
+	if (smu_dpm_ctx->dpm_level != level) {
 		ret = smu_asic_set_performance_level(smu, level);
 		if (ret) {
 			dev_err(smu->adev->dev, "Failed to set performance level!");
@@ -2272,7 +2272,7 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
 		index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
 		workload[0] = smu->workload_setting[index];
 
-		if (force_update || smu->power_profile_mode != workload[0])
+		if (init || smu->power_profile_mode != workload[0])
 			smu_bump_power_profile_mode(smu, workload, 0);
 	}
 

From ec1aab7816b06c32f42935e34ce3a3040c778afb Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 3 Oct 2024 09:57:38 -0400
Subject: [PATCH 9/9] drm/amdgpu/swsmu: default to fullscreen 3D profile for
 dGPUs

This uses more aggressive hueristics than the the bootup default
profile.  On windows the OS has a special fullscreen 3D mode
where this is used.  Since we don't have the equivalent on Linux
default to this profile for dGPUs.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3618
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1500
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3131
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 336568de918e08c825b3b1cbe2ec809f2fc26d94)
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 0cf598983552..9ad9cf7a9c98 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1264,7 +1264,11 @@ static int smu_sw_init(void *handle)
 	smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4;
 	smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
 	smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
-	smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
+
+	if (smu->is_apu)
+		smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
+	else
+		smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
 
 	smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
 	smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;