From 70a6587dca37a3cf12ab323f7fd73266abfc274f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 9 Sep 2024 17:46:50 +0300 Subject: [PATCH 01/41] drm/dp_mst: Fix DSC decompression detection in Synaptics branch devices Atm it's assumed that all Synaptics MST branch devices support DSC, which is not exposed via a DP-to-DP peer device, rather a control flag in the branch device's UFP DPCD applying to all the streams going through it. This isn't true for all adapters with this branch device though (for instance the Cakitte USBC->2xHDMI adapter reported in the Closes link below doesn't support DSC). Fix the above by advertising the DSC decompression support only if the capability flag for this in the UFP DPCD is actually set. Cc: Lyude Paul Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12047 Signed-off-by: Imre Deak Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20240909144650.2931258-1-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index fc2ceae61db2..ac5f1f34e51a 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -6057,6 +6057,7 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) struct drm_dp_aux *immediate_upstream_aux; struct drm_dp_mst_port *fec_port; struct drm_dp_desc desc = {}; + u8 upstream_dsc; u8 endpoint_fec; u8 endpoint_dsc; @@ -6083,8 +6084,6 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) /* DP-to-DP peer device */ if (drm_dp_mst_is_virtual_dpcd(immediate_upstream_port)) { - u8 upstream_dsc; - if (drm_dp_dpcd_read(&port->aux, DP_DSC_SUPPORT, &endpoint_dsc, 1) != 1) return NULL; @@ -6130,6 +6129,13 @@ struct drm_dp_aux *drm_dp_mst_dsc_aux_for_port(struct drm_dp_mst_port *port) if (drm_dp_has_quirk(&desc, DP_DPCD_QUIRK_DSC_WITHOUT_VIRTUAL_DPCD)) { u8 dpcd_ext[DP_RECEIVER_CAP_SIZE]; + if (drm_dp_dpcd_read(immediate_upstream_aux, + DP_DSC_SUPPORT, &upstream_dsc, 1) != 1) + return NULL; + + if (!(upstream_dsc & DP_DSC_DECOMPRESSION_IS_SUPPORTED)) + return NULL; + if (drm_dp_read_dpcd_caps(immediate_upstream_aux, dpcd_ext) < 0) return NULL; From 4a33aa34e4ac2c2bd746a305d4b39fb60dedc091 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 11 Sep 2024 13:16:48 +0300 Subject: [PATCH 02/41] drm/display: fix kerneldocs references The commit 9da7ec9b19d8 ("drm/bridge-connector: move to DRM_DISPLAY_HELPER module") changed location of the drm_bridge_connector.c file, but didn't update the kerneldocs. Fix that. Fixes: 9da7ec9b19d8 ("drm/bridge-connector: move to DRM_DISPLAY_HELPER module") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/dri-devel/20240904163018.214efaa7@canb.auug.org.au/ Acked-by: Thomas Zimmermann Reviewed-By: Carlos Bilbao Link: https://patchwork.freedesktop.org/patch/msgid/20240911-drm-fix-dbc-docs-v1-1-ae5cb82fce1e@linaro.org Signed-off-by: Dmitry Baryshkov --- Documentation/gpu/drm-kms-helpers.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 8435e8621cc0..c3e58856f75b 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -181,7 +181,7 @@ Bridge Operations Bridge Connector Helper ----------------------- -.. kernel-doc:: drivers/gpu/drm/drm_bridge_connector.c +.. kernel-doc:: drivers/gpu/drm/display/drm_bridge_connector.c :doc: overview @@ -204,7 +204,7 @@ MIPI-DSI bridge operation Bridge Connector Helper Reference --------------------------------- -.. kernel-doc:: drivers/gpu/drm/drm_bridge_connector.c +.. kernel-doc:: drivers/gpu/drm/display/drm_bridge_connector.c :export: Panel-Bridge Helper Reference From d92b90f9a54d9300a6e883258e79f36dab53bfae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 27 Aug 2024 12:45:23 +0200 Subject: [PATCH 03/41] drm/vboxvideo: Replace fake VLA at end of vbva_mouse_pointer_shape with real VLA Replace the fake VLA at end of the vbva_mouse_pointer_shape shape with a real VLA to fix a "memcpy: detected field-spanning write error" warning: [ 13.319813] memcpy: detected field-spanning write (size 16896) of single field "p->data" at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 (size 4) [ 13.319841] WARNING: CPU: 0 PID: 1105 at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 hgsmi_update_pointer_shape+0x192/0x1c0 [vboxvideo] [ 13.320038] Call Trace: [ 13.320173] hgsmi_update_pointer_shape [vboxvideo] [ 13.320184] vbox_cursor_atomic_update [vboxvideo] Note as mentioned in the added comment it seems the original length calculation for the allocated and send hgsmi buffer is 4 bytes too large. Changing this is not the goal of this patch, so this behavior is kept. Signed-off-by: Hans de Goede Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240827104523.17442-1-hdegoede@redhat.com --- drivers/gpu/drm/vboxvideo/hgsmi_base.c | 10 +++++++++- drivers/gpu/drm/vboxvideo/vboxvideo.h | 4 +--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vboxvideo/hgsmi_base.c b/drivers/gpu/drm/vboxvideo/hgsmi_base.c index 8c041d7ce4f1..87dccaecc3e5 100644 --- a/drivers/gpu/drm/vboxvideo/hgsmi_base.c +++ b/drivers/gpu/drm/vboxvideo/hgsmi_base.c @@ -139,7 +139,15 @@ int hgsmi_update_pointer_shape(struct gen_pool *ctx, u32 flags, flags |= VBOX_MOUSE_POINTER_VISIBLE; } - p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len, HGSMI_CH_VBVA, + /* + * The 4 extra bytes come from switching struct vbva_mouse_pointer_shape + * from having a 4 bytes fixed array at the end to using a proper VLA + * at the end. These 4 extra bytes were not subtracted from sizeof(*p) + * before the switch to the VLA, so this way the behavior is unchanged. + * Chances are these 4 extra bytes are not necessary but they are kept + * to avoid regressions. + */ + p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len + 4, HGSMI_CH_VBVA, VBVA_MOUSE_POINTER_SHAPE); if (!p) return -ENOMEM; diff --git a/drivers/gpu/drm/vboxvideo/vboxvideo.h b/drivers/gpu/drm/vboxvideo/vboxvideo.h index f60d82504da0..79ec8481de0e 100644 --- a/drivers/gpu/drm/vboxvideo/vboxvideo.h +++ b/drivers/gpu/drm/vboxvideo/vboxvideo.h @@ -351,10 +351,8 @@ struct vbva_mouse_pointer_shape { * Bytes in the gap between the AND and the XOR mask are undefined. * XOR mask scanlines have no gap between them and size of XOR mask is: * xor_len = width * 4 * height. - * - * Preallocate 4 bytes for accessing actual data as p->data. */ - u8 data[4]; + u8 data[]; } __packed; /* pointer is visible */ From cac075706f298948898b1f63e81709df42afa75d Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 23 Sep 2024 11:34:06 +0100 Subject: [PATCH 04/41] drm/panthor: Fix race when converting group handle to group object XArray provides it's own internal lock which protects the internal array when entries are being simultaneously added and removed. However there is still a race between retrieving the pointer from the XArray and incrementing the reference count. To avoid this race simply hold the internal XArray lock when incrementing the reference count, this ensures there cannot be a racing call to xa_erase(). Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Signed-off-by: Steven Price Reviewed-by: Boris Brezillon Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240923103406.2509906-1-steven.price@arm.com --- drivers/gpu/drm/panthor/panthor_sched.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 12b272a912f8..d21fe63ae228 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -3242,6 +3242,18 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) return 0; } +static struct panthor_group *group_from_handle(struct panthor_group_pool *pool, + u32 group_handle) +{ + struct panthor_group *group; + + xa_lock(&pool->xa); + group = group_get(xa_load(&pool->xa, group_handle)); + xa_unlock(&pool->xa); + + return group; +} + int panthor_group_get_state(struct panthor_file *pfile, struct drm_panthor_group_get_state *get_state) { @@ -3253,7 +3265,7 @@ int panthor_group_get_state(struct panthor_file *pfile, if (get_state->pad) return -EINVAL; - group = group_get(xa_load(&gpool->xa, get_state->group_handle)); + group = group_from_handle(gpool, get_state->group_handle); if (!group) return -EINVAL; @@ -3384,7 +3396,7 @@ panthor_job_create(struct panthor_file *pfile, job->call_info.latest_flush = qsubmit->latest_flush; INIT_LIST_HEAD(&job->node); - job->group = group_get(xa_load(&gpool->xa, group_handle)); + job->group = group_from_handle(gpool, group_handle); if (!job->group) { ret = -EINVAL; goto err_put_job; From a3096328462b1e022c6294898c440708ea11509a Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 16 Sep 2024 20:52:00 +0200 Subject: [PATCH 05/41] MAINTAINERS: drm/sched: Add new maintainers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM's GPU scheduler is arguably in need of more intensive maintenance. Danilo and Philipp volunteer to help with the maintainership. Signed-off-by: Philipp Stanner Cc: Christian König Cc: Luben Tuikov Cc: Matthew Brost Cc: Danilo Krummrich Cc: Tvrtko Ursulin Acked-by: Christian König Acked-by: Alex Deucher Acked-by: Matthew Brost Acked-by: Danilo Krummrich Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240916185159.35727-3-pstanner@redhat.com --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 515e7f4b6cd9..71f2afae81b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7704,6 +7704,8 @@ F: drivers/gpu/drm/xlnx/ DRM GPU SCHEDULER M: Luben Tuikov M: Matthew Brost +M: Danilo Krummrich +M: Philipp Stanner L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git From 440d52b370b03b366fd26ace36bab20552116145 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 13 Sep 2024 13:23:01 -0700 Subject: [PATCH 06/41] drm/sched: Fix dynamic job-flow control race Fixes a race condition reported here: https://github.com/AsahiLinux/linux/issues/309#issuecomment-2238968609 The whole premise of lockless access to a single-producer-single- consumer queue is that there is just a single producer and single consumer. That means we can't call drm_sched_can_queue() (which is about queueing more work to the hw, not to the spsc queue) from anywhere other than the consumer (wq). This call in the producer is just an optimization to avoid scheduling the consuming worker if it cannot yet queue more work to the hw. It is safe to drop this optimization to avoid the race condition. Suggested-by: Asahi Lina Fixes: a78422e9dff3 ("drm/sched: implement dynamic job-flow control") Closes: https://github.com/AsahiLinux/linux/issues/309 Cc: stable@vger.kernel.org Signed-off-by: Rob Clark Reviewed-by: Danilo Krummrich Tested-by: Janne Grunau Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240913202301.16772-1-robdclark@gmail.com --- drivers/gpu/drm/scheduler/sched_entity.c | 4 ++-- drivers/gpu/drm/scheduler/sched_main.c | 7 ++----- include/drm/gpu_scheduler.h | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 58c8161289fe..567e5ace6d0c 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -380,7 +380,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, container_of(cb, struct drm_sched_entity, cb); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup(entity->rq->sched, entity); + drm_sched_wakeup(entity->rq->sched); } /** @@ -612,7 +612,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo(entity, submit_ts); - drm_sched_wakeup(entity->rq->sched, entity); + drm_sched_wakeup(entity->rq->sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 7e90c9f95611..a124d5e77b5e 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1022,15 +1022,12 @@ EXPORT_SYMBOL(drm_sched_job_cleanup); /** * drm_sched_wakeup - Wake up the scheduler if it is ready to queue * @sched: scheduler instance - * @entity: the scheduler entity * * Wake up the scheduler if we can queue jobs. */ -void drm_sched_wakeup(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) +void drm_sched_wakeup(struct drm_gpu_scheduler *sched) { - if (drm_sched_can_queue(sched, entity)) - drm_sched_run_job_queue(sched); + drm_sched_run_job_queue(sched); } /** diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 5acc64954a88..e28bc649b5c9 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -574,7 +574,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched); void drm_sched_job_cleanup(struct drm_sched_job *job); -void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); +void drm_sched_wakeup(struct drm_gpu_scheduler *sched); bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); From f0fa69b5011a45394554fb8061d74fee4d7cd72c Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Tue, 27 Aug 2024 11:39:04 -0500 Subject: [PATCH 07/41] drm/connector: hdmi: Fix writing Dynamic Range Mastering infoframes The largest infoframe we create is the DRM (Dynamic Range Mastering) infoframe which is 26 bytes + a 4 byte header, for a total of 30 bytes. With HDMI_MAX_INFOFRAME_SIZE set to 29 bytes, as it is now, we allocate too little space to pack a DRM infoframe in write_device_infoframe(), leading to an ENOSPC return from hdmi_infoframe_pack(), and never calling the connector's write_infoframe() vfunc. Instead of having HDMI_MAX_INFOFRAME_SIZE defined in two places, replace HDMI_MAX_INFOFRAME_SIZE with HDMI_INFOFRAME_SIZE(MAX) and make MAX 27 bytes - which is defined by the HDMI specification to be the largest infoframe payload. Fixes: f378b77227bc ("drm/connector: hdmi: Add Infoframes generation") Fixes: c602e4959a0c ("drm/connector: hdmi: Create Infoframe DebugFS entries") Signed-off-by: Derek Foreman Acked-by: Maxime Ripard Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240827163918.48160-1-derek.foreman@collabora.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 4 +--- drivers/gpu/drm/drm_debugfs.c | 4 +--- include/linux/hdmi.h | 9 +++++++++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 7854820089ec..feb7a3a75981 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -521,8 +521,6 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, } EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_check); -#define HDMI_MAX_INFOFRAME_SIZE 29 - static int clear_device_infoframe(struct drm_connector *connector, enum hdmi_infoframe_type type) { @@ -563,7 +561,7 @@ static int write_device_infoframe(struct drm_connector *connector, { const struct drm_connector_hdmi_funcs *funcs = connector->hdmi.funcs; struct drm_device *dev = connector->dev; - u8 buffer[HDMI_MAX_INFOFRAME_SIZE]; + u8 buffer[HDMI_INFOFRAME_SIZE(MAX)]; int ret; int len; diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 6b239a24f1df..9d3e6dd68810 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -520,8 +520,6 @@ static const struct file_operations drm_connector_fops = { .write = connector_write }; -#define HDMI_MAX_INFOFRAME_SIZE 29 - static ssize_t audio_infoframe_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { @@ -579,7 +577,7 @@ static ssize_t _f##_read_infoframe(struct file *filp, \ struct drm_connector *connector; \ union hdmi_infoframe *frame; \ struct drm_device *dev; \ - u8 buf[HDMI_MAX_INFOFRAME_SIZE]; \ + u8 buf[HDMI_INFOFRAME_SIZE(MAX)]; \ ssize_t len = 0; \ \ connector = filp->private_data; \ diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 3bb87bf6bc65..455f855bc084 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -59,6 +59,15 @@ enum hdmi_infoframe_type { #define HDMI_DRM_INFOFRAME_SIZE 26 #define HDMI_VENDOR_INFOFRAME_SIZE 4 +/* + * HDMI 1.3a table 5-14 states that the largest InfoFrame_length is 27, + * not including the packet header or checksum byte. We include the + * checksum byte in HDMI_INFOFRAME_HEADER_SIZE, so this should allow + * HDMI_INFOFRAME_SIZE(MAX) to be the largest buffer we could ever need + * for any HDMI infoframe. + */ +#define HDMI_MAX_INFOFRAME_SIZE 27 + #define HDMI_INFOFRAME_SIZE(type) \ (HDMI_INFOFRAME_HEADER_SIZE + HDMI_ ## type ## _INFOFRAME_SIZE) From 8b0d2f61545545ab5eef923ed6e59fc3be2385e0 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 23 Sep 2024 09:58:14 +0200 Subject: [PATCH 08/41] drm: Consistently use struct drm_mode_rect for FB_DAMAGE_CLIPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FB_DAMAGE_CLIPS is a plane property for damage handling. Its UAPI should only use UAPI types. Hence replace struct drm_rect with struct drm_mode_rect in drm_atomic_plane_set_property(). Both types are identical in practice, so there's no change in behavior. Reported-by: Ville Syrjälä Closes: https://lore.kernel.org/dri-devel/Zu1Ke1TuThbtz15E@intel.com/ Signed-off-by: Thomas Zimmermann Fixes: d3b21767821e ("drm: Add a new plane property to send damage during plane update") Cc: Lukasz Spintzyk Cc: Deepak Rawat Cc: Daniel Vetter Cc: Thomas Hellstrom Cc: David Airlie Cc: Simona Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: dri-devel@lists.freedesktop.org Cc: # v5.0+ Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240923075841.16231-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_atomic_uapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 7936c2023955..370dc676e3aa 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -543,7 +543,7 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, &state->fb_damage_clips, val, -1, - sizeof(struct drm_rect), + sizeof(struct drm_mode_rect), &replaced); return ret; } else if (property == plane->scaling_filter_property) { From ad604f0a4c040dcb8faf44dc72db25e457c28076 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 24 Sep 2024 10:41:03 +0200 Subject: [PATCH 09/41] firmware/sysfb: Disable sysfb for firmware buffers with unknown parent The sysfb framebuffer handling only operates on graphics devices that provide the system's firmware framebuffer. If that device is not known, assume that any graphics device has been initialized by firmware. Fixes a problem on i915 where sysfb does not release the firmware framebuffer after the native graphics driver loaded. Reported-by: Borah, Chaitanya Kumar Closes: https://lore.kernel.org/dri-devel/SJ1PR11MB6129EFB8CE63D1EF6D932F94B96F2@SJ1PR11MB6129.namprd11.prod.outlook.com/ Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12160 Signed-off-by: Thomas Zimmermann Fixes: b49420d6a1ae ("video/aperture: optionally match the device in sysfb_disable()") Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Cc: Helge Deller Cc: Sam Ravnborg Cc: Daniel Vetter Cc: Alex Deucher Cc: dri-devel@lists.freedesktop.org Cc: Linux regression tracking (Thorsten Leemhuis) Cc: # v6.11+ Acked-by: Alex Deucher Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240924084227.262271-1-tzimmermann@suse.de --- drivers/firmware/sysfb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 02a07d3d0d40..a3df782fa687 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -67,9 +67,11 @@ static bool sysfb_unregister(void) void sysfb_disable(struct device *dev) { struct screen_info *si = &screen_info; + struct device *parent; mutex_lock(&disable_lock); - if (!dev || dev == sysfb_parent_dev(si)) { + parent = sysfb_parent_dev(si); + if (!dev || !parent || dev == parent) { sysfb_unregister(); disabled = true; } From 4286cc2c953983d44d248c9de1c81d3a9643345c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 13 Sep 2024 17:05:52 +0100 Subject: [PATCH 10/41] drm/sched: Add locking to drm_sched_entity_modify_sched MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without the locking amdgpu currently can race between amdgpu_ctx_set_entity_priority() (via drm_sched_entity_modify_sched()) and drm_sched_job_arm(), leading to the latter accesing potentially inconsitent entity->sched_list and entity->num_sched_list pair. v2: * Improve commit message. (Philipp) Signed-off-by: Tvrtko Ursulin Fixes: b37aced31eb0 ("drm/scheduler: implement a function to modify sched list") Cc: Christian König Cc: Alex Deucher Cc: Luben Tuikov Cc: Matthew Brost Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: Philipp Stanner Cc: # v5.7+ Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240913160559.49054-2-tursulin@igalia.com Signed-off-by: Christian König --- drivers/gpu/drm/scheduler/sched_entity.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 567e5ace6d0c..0e002c17fcb6 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -133,8 +133,10 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, { WARN_ON(!num_sched_list || !sched_list); + spin_lock(&entity->rq_lock); entity->sched_list = sched_list; entity->num_sched_list = num_sched_list; + spin_unlock(&entity->rq_lock); } EXPORT_SYMBOL(drm_sched_entity_modify_sched); From cbc8764e29c2318229261a679b2aafd0f9072885 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Sep 2024 11:19:08 +0100 Subject: [PATCH 11/41] drm/sched: Always wake up correct scheduler in drm_sched_entity_push_job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since drm_sched_entity_modify_sched() can modify the entities run queue, lets make sure to only dereference the pointer once so both adding and waking up are guaranteed to be consistent. Alternative of moving the spin_unlock to after the wake up would for now be more problematic since the same lock is taken inside drm_sched_rq_update_fifo(). v2: * Improve commit message. (Philipp) * Cache the scheduler pointer directly. (Christian) Signed-off-by: Tvrtko Ursulin Fixes: b37aced31eb0 ("drm/scheduler: implement a function to modify sched list") Cc: Christian König Cc: Alex Deucher Cc: Luben Tuikov Cc: Matthew Brost Cc: David Airlie Cc: Daniel Vetter Cc: Philipp Stanner Cc: dri-devel@lists.freedesktop.org Cc: # v5.7+ Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240924101914.2713-3-tursulin@igalia.com Signed-off-by: Christian König --- drivers/gpu/drm/scheduler/sched_entity.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 0e002c17fcb6..a75eede8bf8d 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -599,6 +599,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { + struct drm_gpu_scheduler *sched; + struct drm_sched_rq *rq; + /* Add the entity to the run queue */ spin_lock(&entity->rq_lock); if (entity->stopped) { @@ -608,13 +611,16 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) return; } - drm_sched_rq_add_entity(entity->rq, entity); + rq = entity->rq; + sched = rq->sched; + + drm_sched_rq_add_entity(rq, entity); spin_unlock(&entity->rq_lock); if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo(entity, submit_ts); - drm_sched_wakeup(entity->rq->sched); + drm_sched_wakeup(sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job); From 087913e0ba2b3b9d7ccbafb2acf5dab9e35ae1d5 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 24 Sep 2024 11:19:09 +0100 Subject: [PATCH 12/41] drm/sched: Always increment correct scheduler score MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Entities run queue can change during drm_sched_entity_push_job() so make sure to update the score consistently. Signed-off-by: Tvrtko Ursulin Fixes: d41a39dda140 ("drm/scheduler: improve job distribution with multiple queues") Cc: Nirmoy Das Cc: Christian König Cc: Luben Tuikov Cc: Matthew Brost Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.9+ Reviewed-by: Christian König Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240924101914.2713-4-tursulin@igalia.com Signed-off-by: Christian König --- drivers/gpu/drm/scheduler/sched_entity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index a75eede8bf8d..b2cf3e0c1838 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -586,7 +586,6 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) ktime_t submit_ts; trace_drm_sched_job(sched_job, entity); - atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); /* @@ -614,6 +613,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) rq = entity->rq; sched = rq->sched; + atomic_inc(sched->score); drm_sched_rq_add_entity(rq, entity); spin_unlock(&entity->rq_lock); From abf201f6ce14c4ceeccde5471bdf59614b83a3d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 30 Sep 2024 15:07:49 +0200 Subject: [PATCH 13/41] drm/sched: revert "Always increment correct scheduler score" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 087913e0ba2b3b9d7ccbafb2acf5dab9e35ae1d5. It turned out that the original code was correct since the rq can only change when there is no armed job for an entity. This change here broke the logic since we only incremented the counter for the first job, so revert it. Signed-off-by: Christian König Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240930131451.536150-1-christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_entity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index b2cf3e0c1838..a75eede8bf8d 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -586,6 +586,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) ktime_t submit_ts; trace_drm_sched_job(sched_job, entity); + atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); /* @@ -613,7 +614,6 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) rq = entity->rq; sched = rq->sched; - atomic_inc(sched->score); drm_sched_rq_add_entity(rq, entity); spin_unlock(&entity->rq_lock); From 394b52462020b6cceff1f7f47fdebd03589574f3 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 18 Sep 2024 20:35:43 +0300 Subject: [PATCH 14/41] drm/i915/gem: fix bitwise and logical AND mixup CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND is an int, defaulting to 250. When the wakeref is non-zero, it's either -1 or a dynamically allocated pointer, depending on CONFIG_DRM_I915_DEBUG_RUNTIME_PM. It's likely that the code works by coincidence with the bitwise AND, but with CONFIG_DRM_I915_DEBUG_RUNTIME_PM=y, there's the off chance that the condition evaluates to false, and intel_wakeref_auto() doesn't get called. Switch to the intended logical AND. v2: Use != to avoid clang -Wconstant-logical-operand (Nathan) Fixes: ad74457a6b5a ("drm/i915/dgfx: Release mmap on rpm suspend") Cc: Matthew Auld Cc: Rodrigo Vivi Cc: Anshuman Gupta Cc: Andi Shyti Cc: Nathan Chancellor Cc: stable@vger.kernel.org # v6.1+ Reviewed-by: Matthew Auld Reviewed-by: Andi Shyti # v1 Link: https://patchwork.freedesktop.org/patch/msgid/643cc0a4d12f47fd8403d42581e83b1e9c4543c7.1726680898.git.jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 4c1bfe259ed1d2ade826f95d437e1c41b274df04) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 5c72462d1f57..b22e2019768f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1131,7 +1131,7 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(bo->resource)); } - if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) + if (wakeref && CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND != 0) intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); From 2b55639a4e25ff02ee496368b03456bd28ebdc0b Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Fri, 20 Sep 2024 11:28:02 +0100 Subject: [PATCH 15/41] drm/panthor: Add FOP_UNSIGNED_OFFSET to fop_flags Since commit 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to fop_flags") the FMODE_UNSIGNED_OFFSET flag has been moved to fop_flags and renamed, but the patch failed to make the changes for the panthor driver. When user space opens the render node the WARN() added by the patch gets triggered. Fixes: 641bb4394f40 ("fs: move FMODE_UNSIGNED_OFFSET to fop_flags") Cc: Christian Brauner Signed-off-by: Liviu Dudau Reviewed-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Christian Brauner Tested-by: Heiko Stuebner Signed-off-by: Boris Brezillon Link: https://patchwork.freedesktop.org/patch/msgid/20240920102802.2483367-1-liviu.dudau@arm.com --- drivers/gpu/drm/panthor/panthor_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 34182f67136c..c520f156e2d7 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -1383,6 +1383,7 @@ static const struct file_operations panthor_drm_driver_fops = { .read = drm_read, .llseek = noop_llseek, .mmap = panthor_mmap, + .fop_flags = FOP_UNSIGNED_OFFSET, }; #ifdef CONFIG_DEBUG_FS From fa998a9eac8809da4f219aad49836fcad2a9bf5c Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 13 Sep 2024 13:27:22 +0200 Subject: [PATCH 16/41] drm/panthor: Lock the VM resv before calling drm_gpuvm_bo_obtain_prealloc() drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our pre-allocated BO if the association exists. Given we only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will be called immediately, and we have to hold the VM resv lock when calling this function. Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block") Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240913112722.492144-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_mmu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index bbc12728437f..3cd2bce59edc 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1251,9 +1251,17 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, goto err_cleanup; } + /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our + * pre-allocated BO if the association exists. Given we + * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will + * be called immediately, and we have to hold the VM resv lock when + * calling this function. + */ + dma_resv_lock(panthor_vm_resv(vm), NULL); mutex_lock(&bo->gpuva_list_lock); op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); mutex_unlock(&bo->gpuva_list_lock); + dma_resv_unlock(panthor_vm_resv(vm)); /* If the a vm_bo for this combination exists, it already * retains a pin ref, and we can release the one we took earlier. From 282864cc5d3f144af0cdea1868ee2dc2c5110f0d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 30 Sep 2024 18:37:42 +0200 Subject: [PATCH 17/41] drm/panthor: Fix access to uninitialized variable in tick_ctx_cleanup() The group variable can't be used to retrieve ptdev in our second loop, because it points to the previously iterated list_head, not a valid group. Get the ptdev object from the scheduler instead. Cc: Fixes: d72f049087d4 ("drm/panthor: Allow driver compilation") Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202409302306.UDikqa03-lkp@intel.com/ Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240930163742.87036-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_sched.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index a8a939a9fb51..145d983bb129 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2046,6 +2046,7 @@ static void tick_ctx_cleanup(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx) { + struct panthor_device *ptdev = sched->ptdev; struct panthor_group *group, *tmp; u32 i; @@ -2054,7 +2055,7 @@ tick_ctx_cleanup(struct panthor_scheduler *sched, /* If everything went fine, we should only have groups * to be terminated in the old_groups lists. */ - drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask && + drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask && group_can_run(group)); if (!group_can_run(group)) { @@ -2077,7 +2078,7 @@ tick_ctx_cleanup(struct panthor_scheduler *sched, /* If everything went fine, the groups to schedule lists should * be empty. */ - drm_WARN_ON(&group->ptdev->base, + drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i])); list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) { From 7a1f30afe97294281a2ba05977688385744f9844 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Sep 2024 09:19:14 +0200 Subject: [PATCH 18/41] drm/panthor: Don't declare a queue blocked if deferred operations are pending If deferred operations are pending, we want to wait for those to land before declaring the queue blocked on a SYNC_WAIT. We need this to deal with the case where the sync object is signalled through a deferred SYNC_{ADD,SET} from the same queue. If we don't do that and the group gets scheduled out before the deferred SYNC_{SET,ADD} is executed, we'll end up with a timeout, because no external SYNC_{SET,ADD} will make the scheduler reconsider the group for execution. Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Cc: Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240905071914.3278599-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_sched.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 145d983bb129..2aff02ba6949 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -1103,7 +1103,13 @@ cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs list_move_tail(&group->wait_node, &group->ptdev->scheduler->groups.waiting); } - group->blocked_queues |= BIT(cs_id); + + /* The queue is only blocked if there's no deferred operation + * pending, which can be checked through the scoreboard status. + */ + if (!cs_iface->output->status_scoreboards) + group->blocked_queues |= BIT(cs_id); + queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr; queue->syncwait.ref = cs_iface->output->status_wait_sync_value; status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK; From f9e7ac6e2e9986c2ee63224992cb5c8276e46b2a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Sep 2024 09:01:54 +0200 Subject: [PATCH 19/41] drm/panthor: Don't add write fences to the shared BOs The only user (the mesa gallium driver) is already assuming explicit synchronization and doing the export/import dance on shared BOs. The only reason we were registering ourselves as writers on external BOs is because Xe, which was the reference back when we developed Panthor, was doing so. Turns out Xe was wrong, and we really want bookkeep on all registered fences, so userspace can explicitly upgrade those to read/write when needed. Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block") Cc: Matthew Brost Cc: Simona Vetter Cc: Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://patchwork.freedesktop.org/patch/msgid/20240905070155.3254011-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_sched.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 2aff02ba6949..aee362abb710 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -3443,13 +3443,8 @@ void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched { struct panthor_job *job = container_of(sched_job, struct panthor_job, base); - /* Still not sure why we want USAGE_WRITE for external objects, since I - * was assuming this would be handled through explicit syncs being imported - * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers - * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason. - */ panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); } void panthor_sched_unplug(struct panthor_device *ptdev) From 0d5e5e8a0aa49ea2163abf128da3b509a6c58286 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Sat, 7 Sep 2024 13:25:19 -0400 Subject: [PATCH 20/41] drm/amd/display: update DML2 policy EnhancedPrefetchScheduleAccelerationFinal DCN35 [WHY & HOW] Mismatch in DCN35 DML2 cause bw validation failed to acquire unexpected DPP pipe to cause grey screen and system hang. Remove EnhancedPrefetchScheduleAccelerationFinal value override to match HW spec. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Charlene Liu Signed-off-by: Yihan Zhu Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 9dad21f910fcea2bdcff4af46159101d7f9cd8ba) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c index c4c52173ef22..11c904ae2958 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c @@ -303,7 +303,6 @@ void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_m if (project == dml_project_dcn35 || project == dml_project_dcn351) { policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; - policy->EnhancedPrefetchScheduleAccelerationFinal = 0; policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ policy->UseOnlyMaxPrefetchModes = 1; } From d51160ab00969ee6758ed2dcbc0f81dd476a181c Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 4 Sep 2024 15:29:24 -0400 Subject: [PATCH 21/41] drm/amd/display: Restore Optimized pbn Value if Failed to Disable DSC Existing last step of dsc policy is to restore pbn value under minimum compression when try to greedily disable dsc for a stream failed to fit in MST bw. Optimized dsc params result from optimization step is not necessarily the minimum compression, therefore it is not correct to restore the pbn under minimum compression rate. Restore the pbn under minimum compression instead of the value from optimized pbn could result in the dsc params not correct at the modeset where atomic_check failed due to not enough bw. One or more monitors connected could not light up in such case. Restore the optimized pbn value, instead of using the pbn value under minimum compression. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Wayne Lin Signed-off-by: Fangzhi Zuo Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 352c3165d2b75030169e012461a16bcf97f392fc) Cc: stable@vger.kernel.org --- .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 83a31b97e96b..a08e8a0b696c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1027,6 +1027,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int remaining_to_try = 0; int ret; uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); + int var_pbn; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1057,13 +1058,18 @@ static int try_disable_dsc(struct drm_atomic_state *state, break; DRM_DEBUG_DRIVER("MST_DSC index #%d, try no compression\n", next_index); + var_pbn = vars[next_index].pbn; vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, vars[next_index].pbn); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n", + __func__, __LINE__, next_index, ret); + vars[next_index].pbn = var_pbn; return ret; + } ret = drm_dp_mst_atomic_check(state); if (ret == 0) { @@ -1071,14 +1077,17 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - DRM_DEBUG_DRIVER("MST_DSC index #%d, restore minimum compression\n", next_index); - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps, fec_overhead_multiplier_x1000); + DRM_DEBUG_DRIVER("MST_DSC index #%d, restore optimized pbn value\n", next_index); + vars[next_index].pbn = var_pbn; ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, vars[next_index].pbn); - if (ret < 0) + if (ret < 0) { + DRM_DEBUG_DRIVER("%s:%d MST_DSC index #%d, failed to set pbn to the state, %d\n", + __func__, __LINE__, next_index, ret); return ret; + } } tried[next_index] = true; From c36df0f5f5e5acec5d78f23c4725cc500df28843 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 11 Sep 2024 19:45:09 -0400 Subject: [PATCH 22/41] drm/amd/display: avoid set dispclk to 0 [why] set dispclk to 0 cause stability issue. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Nicholas Kazlauskas Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1c6b16ebf5eb2bc5740be9e37b3a69f1dfe1dded) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index da9101b83e8c..70abd32ce2ad 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -766,6 +766,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmub_reallow_idle = false, .static_screen_wait_frames = 2, .notify_dpia_hr_bw = true, + .min_disp_clk_khz = 50000, }; static const struct dc_panel_config panel_config_defaults = { From 05af800704ee7187d9edd461ec90f3679b1c4aba Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 6 Sep 2024 11:39:18 -0600 Subject: [PATCH 23/41] drm/amd/display: Add HDR workaround for specific eDP [WHY & HOW] Some eDP panels suffer from flicking when HDR is enabled in KDE. This quirk works around it by skipping VSC that is incompatible with eDP panels. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3151 Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 4d4257280d7957727998ef90ccc7b69c7cca8376) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 ++++++++++- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6e79028c5d78..7a826f20c38a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6735,12 +6735,21 @@ create_stream_for_sink(struct drm_connector *connector, if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || stream->signal == SIGNAL_TYPE_EDP) { + const struct dc_edid_caps *edid_caps; + unsigned int disable_colorimetry = 0; + + if (aconnector->dc_sink) { + edid_caps = &aconnector->dc_sink->edid_caps; + disable_colorimetry = edid_caps->panel_patch.disable_colorimetry; + } + // // should decide stream support vsc sdp colorimetry capability // before building vsc info packet // stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 && - stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED; + stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED && + !disable_colorimetry; if (stream->out_transfer_func.tf == TRANSFER_FUNCTION_GAMMA22) tf = TRANSFER_FUNC_GAMMA_22; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 50109d13d967..069e0195e50a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -73,6 +73,10 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; + case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154): + DRM_DEBUG_DRIVER("Disabling VSC on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.disable_colorimetry = true; + break; default: return; } diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index fd6dca735714..6d7989b751e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -178,6 +178,7 @@ struct dc_panel_patch { unsigned int skip_avmute; unsigned int mst_start_top_delay; unsigned int remove_sink_ext_caps; + unsigned int disable_colorimetry; }; struct dc_edid_caps { From ef785ca7f7c80891580cafd36c8dd86375684310 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Wed, 11 Sep 2024 17:27:08 -0400 Subject: [PATCH 24/41] drm/amd/display: Enable idle workqueue for more IPS modes [Why] There are more IPS modes other than DMUB_IPS_ENABLE that enables IPS. We need to enable the hotplug detect idle workqueue for those modes as well. [How] Modify the if condition to initialize the workqueue in all IPS modes except for DMUB_IPS_DISABLE_ALL. Fixes: 65444581a4ae ("drm/amd/display: Determine IPS mode by ASIC and PMFW versions") Signed-off-by: Leo Li Reviewed-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 181db30bcfed097ecc680539b1eabe935c11f57f) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7a826f20c38a..5109146ef2ba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2026,7 +2026,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n"); } - if (adev->dm.dc->caps.ips_support && adev->dm.dc->config.disable_ips == DMUB_IPS_ENABLE) + if (adev->dm.dc->caps.ips_support && + adev->dm.dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) adev->dm.idle_workqueue = idle_create_workqueue(adev); if (adev->dm.dc->caps.max_links > 0 && adev->family >= AMDGPU_FAMILY_RV) { From 52d4e3fb3d340447dcdac0e14ff21a764f326907 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 13 Sep 2024 15:44:40 +0800 Subject: [PATCH 25/41] drm/amd/display: Fix system hang while resume with TBT monitor [Why] Connected with a Thunderbolt monitor and do the suspend and the system may hang while resume. The TBT monitor HPD will be triggered during the resume procedure and call the drm_client_modeset_probe() while struct drm_connector connector->dev->master is NULL. It will mess up the pipe topology after resume. [How] Skip the TBT monitor HPD during the resume procedure because we currently will probe the connectors after resume by default. Reviewed-by: Wayne Lin Signed-off-by: Tom Chung Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 453f86a26945207a16b8f66aaed5962dc2b95b85) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5109146ef2ba..60c617fcc97e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -770,6 +770,12 @@ static void dmub_hpd_callback(struct amdgpu_device *adev, return; } + /* Skip DMUB HPD IRQ in suspend/resume. We will probe them later. */ + if (notify->type == DMUB_NOTIFICATION_HPD && adev->in_suspend) { + DRM_INFO("Skip DMUB HPD IRQ callback in suspend/resume\n"); + return; + } + link_index = notify->link_index; link = adev->dm.dc->links[link_index]; dev = adev->dm.ddev; From cffa8e83df9fe525afad1e1099097413f9174f57 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Sep 2024 17:45:07 -0400 Subject: [PATCH 26/41] drm/xe: Restore pci state upon resume The pci state was saved, but not restored. Restore right after the power state transition request like every other driver. v2: Use right fixes tag, since this was there initialy, but accidentally removed. Fixes: f6761c68c0ac ("drm/xe/display: Improve s2idle handling.") Cc: Maarten Lankhorst Cc: Lucas De Marchi Reviewed-by: Jonathan Cavitt Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240912214507.456897-1-rodrigo.vivi@intel.com Signed-off-by: Maarten Lankhorst (cherry picked from commit ec2d1539e159f53eae708e194c449cfefa004994) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 937c3e064f0d..5e962e72c97e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -924,6 +924,8 @@ static int xe_pci_resume(struct device *dev) if (err) return err; + pci_restore_state(pdev); + err = pci_enable_device(pdev); if (err) return err; From 790533e44bfc7af929842fccd9674c9f424d4627 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 24 Sep 2024 16:09:48 +0100 Subject: [PATCH 27/41] drm/xe/guc_submit: add missing locking in wedged_fini Any non-wedged queue can have a zero refcount here and can be running concurrently with an async queue destroy, therefore dereferencing the queue ptr to check wedge status after the lookup can trigger UAF if queue is not wedged. Fix this by keeping the submission_state lock held around the check to postpone the free and make the check safe, before dropping again around the put() to avoid the deadlock. Fixes: 8ed9aaae39f3 ("drm/xe: Force wedged state and block GT reset upon any GPU hang") Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240924150947.118433-2-matthew.auld@intel.com (cherry picked from commit d28af0b6b9580b9f90c265a7da0315b0ad20bbfd) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fbbe6a487bbb..715c761dc7d6 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -290,9 +290,15 @@ static void guc_submit_wedged_fini(void *arg) struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_wedged(q)) { + mutex_unlock(&guc->submission_state.lock); xe_exec_queue_put(q); + mutex_lock(&guc->submission_state.lock); + } + } + mutex_unlock(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; From 2d2be279f1ca9e7288282d4214f16eea8a727cdb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 23 Sep 2024 15:56:48 +0100 Subject: [PATCH 28/41] drm/xe: fix UAF around queue destruction We currently do stuff like queuing the final destruction step on a random system wq, which will outlive the driver instance. With bad timing we can teardown the driver with one or more work workqueue still being alive leading to various UAF splats. Add a fini step to ensure user queues are properly torn down. At this point GuC should already be nuked so queue itself should no longer be referenced from hw pov. v2 (Matt B) - Looks much safer to use a waitqueue and then just wait for the xa_array to become empty before triggering the drain. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2317 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240923145647.77707-2-matthew.auld@intel.com (cherry picked from commit 861108666cc0e999cffeab6aff17b662e68774e3) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 6 +++++- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_guc_submit.c | 26 +++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_guc_types.h | 2 ++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 70d4e4d46c3c..74e593caf87c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -298,6 +298,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); + if (xe->destroy_wq) + destroy_workqueue(xe->destroy_wq); + ttm_device_fini(&xe->ttm); } @@ -363,8 +366,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); if (!xe->ordered_wq || !xe->unordered_wq || - !xe->preempt_fence_wq) { + !xe->preempt_fence_wq || !xe->destroy_wq) { /* * Cleanup done in xe_device_destroy via * drmm_add_action_or_reset register above diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ec7eb7811126..24c8c2d20676 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -396,6 +396,9 @@ struct xe_device { /** @unordered_wq: used to serialize unordered work, mostly display */ struct workqueue_struct *unordered_wq; + /** @destroy_wq: used to serialize user destroy work, like queue */ + struct workqueue_struct *destroy_wq; + /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 715c761dc7d6..98a6a385a796 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -276,10 +276,26 @@ static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) } #endif +static void xe_guc_submit_fini(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); +} + static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + xe_guc_submit_fini(guc); xa_destroy(&guc->submission_state.exec_queue_lookup); free_submit_wq(guc); } @@ -351,6 +367,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) xa_init(&guc->submission_state.exec_queue_lookup); + init_waitqueue_head(&guc->submission_state.fini_wq); + primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); @@ -367,6 +385,9 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa xe_guc_id_mgr_release_locked(&guc->submission_state.idm, q->guc->id, q->width); + + if (xa_empty(&guc->submission_state.exec_queue_lookup)) + wake_up(&guc->submission_state.fini_wq); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1274,13 +1295,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else - queue_work(system_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->fini_async); } static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 546ac6350a31..69046f698271 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -81,6 +81,8 @@ struct xe_guc { #endif /** @submission_state.enabled: submission is enabled */ bool enabled; + /** @submission_state.fini_wq: submit fini wait queue */ + wait_queue_head_t fini_wq; } submission_state; /** @hwconfig: Hardware config state */ struct { From cb58977016d1b25781743e5fbe6a545493785e37 Mon Sep 17 00:00:00 2001 From: He Lugang Date: Wed, 11 Sep 2024 18:22:15 +0800 Subject: [PATCH 29/41] drm/xe: use devm_add_action_or_reset() helper Use devm_add_action_or_reset() to release resources in case of failure, because the cleanup function will be automatically called. Reviewed-by: Rodrigo Vivi Signed-off-by: He Lugang Link: https://patchwork.freedesktop.org/patch/msgid/9631BC17D1E028A2+20240911102215.84865-1-helugang@uniontech.com Signed-off-by: Rodrigo Vivi (cherry picked from commit fdc81c43f0c14ace6383024a02585e3fcbd1ceba) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_freq.c | 4 ++-- drivers/gpu/drm/xe/xe_gt_sysfs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 68a5778b4319..ab76973f3e1e 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -237,11 +237,11 @@ int xe_gt_freq_init(struct xe_gt *gt) if (!gt->freq) return -ENOMEM; - err = devm_add_action(xe->drm.dev, freq_fini, gt->freq); + err = sysfs_create_files(gt->freq, freq_attrs); if (err) return err; - err = sysfs_create_files(gt->freq, freq_attrs); + err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index a05c3699e8b9..ec2b8246204b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt) gt->sysfs = &kg->base; - return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt); + return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt); } From d1ef967126e295d36201e79ec64efdba31710353 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Sep 2024 22:44:36 -0700 Subject: [PATCH 30/41] drm/xe: Convert to USM lock to rwsem Remove contention from GPU fault path for ASID->VM lookup. Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240918054436.1971839-1-matthew.brost@intel.com (cherry picked from commit 1378c633a3fbfeb344c486ffda0e920a21e62712) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 4 +--- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 8 ++++---- drivers/gpu/drm/xe/xe_vm.c | 8 ++++---- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 74e593caf87c..fb7ac06aeef8 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -339,9 +339,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); - err = drmm_mutex_init(&xe->drm, &xe->usm.lock); - if (err) - goto err; + init_rwsem(&xe->usm.lock); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 24c8c2d20676..00e370dcf4a9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -369,7 +369,7 @@ struct xe_device { /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; /** @usm.lock: protects UM state */ - struct mutex lock; + struct rw_semaphore lock; } usm; /** @pinned: pinned BO state */ diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 00af059a8971..5c3af2bb5402 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -198,13 +198,13 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) return -EFAULT; /* ASID to VM */ - mutex_lock(&xe->usm.lock); + down_read(&xe->usm.lock); vm = xa_load(&xe->usm.asid_to_vm, pf->asid); if (vm && xe_vm_in_fault_mode(vm)) xe_vm_get(vm); else vm = NULL; - mutex_unlock(&xe->usm.lock); + up_read(&xe->usm.lock); if (!vm) return -EINVAL; @@ -549,11 +549,11 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) return -EINVAL; /* ASID to VM */ - mutex_lock(&xe->usm.lock); + down_read(&xe->usm.lock); vm = xa_load(&xe->usm.asid_to_vm, acc->asid); if (vm) xe_vm_get(vm); - mutex_unlock(&xe->usm.lock); + up_read(&xe->usm.lock); if (!vm || !xe_vm_in_fault_mode(vm)) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7acd5fc9d032..a3d7cb7cfd22 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1613,7 +1613,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); if (vm->usm.asid) { void *lookup; @@ -1623,7 +1623,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); xe_assert(xe, lookup == vm); } - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); @@ -1772,11 +1772,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, goto err_close_and_put; if (xe->info.has_asid) { - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); if (err < 0) goto err_free_id; From 0f18ac78aa974660a948dafcc45f4dc6e2c5858d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 18 Sep 2024 09:05:03 -0700 Subject: [PATCH 31/41] drm/xe: Use helper for ASID -> VM in GPU faults and access counters Normalize both code paths with a helper. Fixes a possible leak access counter path too. Suggested-by: Matthew Auld Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240918160503.2021315-1-matthew.brost@intel.com (cherry picked from commit dc0dce6d63d22e8319e27b6a41be7368376f9471) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 39 +++++++++++++++------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 5c3af2bb5402..79c426dc2505 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -185,6 +185,21 @@ unlock_dma_resv: return err; } +static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); @@ -197,16 +212,9 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) if (pf->trva_fault) return -EFAULT; - /* ASID to VM */ - down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = NULL; - up_read(&xe->usm.lock); - if (!vm) - return -EINVAL; + vm = asid_to_vm(xe, pf->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); /* * TODO: Change to read lock? Using write lock for simplicity. @@ -548,14 +556,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) if (acc->access_type != ACC_TRIGGER) return -EINVAL; - /* ASID to VM */ - down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, acc->asid); - if (vm) - xe_vm_get(vm); - up_read(&xe->usm.lock); - if (!vm || !xe_vm_in_fault_mode(vm)) - return -EINVAL; + vm = asid_to_vm(xe, acc->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); down_read(&vm->lock); From 7929ffce0f8b9c76cb5c2a67d1966beaed20ab61 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 20 Sep 2024 18:13:15 -0300 Subject: [PATCH 32/41] drm/xe/mcr: Use Xe2_LPM steering tables for Xe2_HPM According to Bspec, Xe2 steering tables must be used for Xe2_HPM, just as it is with Xe2_LPM. Update our driver to reflect that. Bspec: 71186 Reviewed-by: Matt Roper Signed-off-by: Gustavo Sousa Reviewed-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240920211459.255181-2-gustavo.sousa@intel.com (cherry picked from commit 21ae035ae5c33ef176f4062bd9d4aa973dde240b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7d7bd0be6233..c834f64b0178 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -439,7 +439,7 @@ void xe_gt_mcr_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); - if (MEDIA_VER(xe) >= 20) { + if (MEDIA_VERx100(xe) >= 1301) { gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table; } else { From 3bf90935aafc750c838c8831e96c3ac36cfd48d5 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 20 Sep 2024 18:13:16 -0300 Subject: [PATCH 33/41] drm/xe/xe2: Extend performance tuning to media GT With exception of "Tuning: L3 cache - media", we are currently applying recommended performance tuning settings only for the primary GT. Let's also implement them for the media GT when applicable. According to our spec, media GT registers CCCHKNREG1 and L3SQCREG* exist only in Xe2_LPM and their offsets do not match their primary GT counterparts. Furthermore, the range where CCCHKNREG1 belongs is not listed as a multicast range on the media GT. As such, we need to have Xe2_LPM-specific definitions for those registers and apply the setting only for that specific IP. Both Xe2_HPM and Xe2_LPM contain STATELESS_COMPRESSION_CTRL and the offset on the media GT matches the one on the primary one. So we can simply have a copy of "Tuning: Stateless compression control" for the media GT. v2: - Fix implementation with respect to multicast vs non-multicast registers. (Matt) - Add missing XE2LPM_CCCHKNREG1 on second action of "Tuning: Compression Overfetch - media". v3: - STATELESS_COMPRESSION_CTRL on Xe2_HPM is also a multicast register, do not define a XE2HPM_STATELESS_COMPRESSION_CTRL register. (Tejas) Bspec: 72161 Cc: Matt Roper Reviewed-by: Tejas Upadhyay Signed-off-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240920211459.255181-3-gustavo.sousa@intel.com (cherry picked from commit e1f813947ccf2326cfda4558b7d31430d7860c4b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 6 ++++++ drivers/gpu/drm/xe/xe_tuning.c | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 660ff42e45a6..5a1d4639e916 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -169,6 +169,8 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) + #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) @@ -391,6 +393,10 @@ #define SCRATCH1LPFC XE_REG(0xb474) #define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) +#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) + +#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index faa1bf42e50e..c798ae1b3f75 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -42,20 +42,40 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), SET(CCCHKNREG1, L3CMPCTRL)) }, + { XE_RTP_NAME("Tuning: Compression Overfetch - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), + SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) + }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) + }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, { XE_RTP_NAME("Tuning: Stateless compression control"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) }, + { XE_RTP_NAME("Tuning: Stateless compression control - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, + {} }; From 6ef5a04221aaeb858d1a825b2ecb7e200cac80f8 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 20 Sep 2024 18:13:18 -0300 Subject: [PATCH 34/41] drm/xe/xe2: Add performance tuning for L3 cache flushing A recommended performance tuning for LNL related to L3 cache flushing was recently introduced in Bspec. Implement it. Unlike the other existing tuning settings, we limit this one for LNL only, since there is no info about whether this would be applicable to other platforms yet. In the future we can come back and use IP version ranges if applicable. v2: - Fix reference to Bspec. (Sai Teja, Tejas) - Use correct register name for "Tuning: L3 RW flush all Cache". (Sai Teja) - Use SCRATCH3_LBCF (with the underscore) for better readability. v3: - Limit setting to LNL only. (Matt) Bspec: 72161 Cc: Sai Teja Pottumuttu Cc: Tejas Upadhyay Cc: Matt Roper Signed-off-by: Gustavo Sousa Reviewed-by: Matt Roper Reviewed-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240920211459.255181-5-gustavo.sousa@intel.com (cherry picked from commit 876253165f3eaaacacb8c8bed16a9df4b6081479) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 +++++ drivers/gpu/drm/xe/xe_tuning.c | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5a1d4639e916..ac9c437e103d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -380,6 +380,9 @@ #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) +#define SCRATCH3_LBCF XE_REG_MCR(0xb154) +#define RWFLUSHALLEN REG_BIT(17) + #define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) @@ -397,6 +400,8 @@ #define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) +#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index c798ae1b3f75..0d5e04158917 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -75,6 +75,14 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) }, + { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) + }, {} }; From 1b30f87e088b499eb74298db256da5c98e8276e2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jul 2024 16:59:19 -0700 Subject: [PATCH 35/41] drm/xe: Resume TDR after GT reset Not starting the TDR after GT reset on exec queue which have been restarted can lead to jobs being able to be run forever. Fix this by restarting the TDR. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240724235919.1917216-1-matthew.brost@intel.com (cherry picked from commit 8ec5a4e5ce97d6ee9f5eb5b4ce4cfc831976fdec) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 5 +++++ drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 ++ drivers/gpu/drm/xe/xe_guc_submit.c | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index c518d1d16d82..50361b4638f9 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -90,6 +90,11 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) +{ + drm_sched_resume_timeout(&sched->base, sched->base.timeout); +} + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index cee9c6809fc0..5ad5629a6c60 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -22,6 +22,8 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 98a6a385a796..80062e1d3f66 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1826,6 +1826,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) } xe_sched_submission_start(sched); + xe_sched_submission_resume_tdr(sched); } int xe_guc_submit_start(struct xe_guc *guc) From 9e3c85ddea7a473ed57b6cdfef2dfd468356fc91 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 20 Sep 2024 18:17:12 -0700 Subject: [PATCH 36/41] drm/xe: Clean up VM / exec queue file lock usage. Both the VM / exec queue file lock protect the lookup and reference to the object, nothing more. These locks are not intended anything else underneath them. XA have their own locking too, so no need to take the VM / exec queue file lock aside from when doing a lookup and reference get. Add some kernel doc to make this clear and cleanup a few typos too. Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240921011712.2681510-1-matthew.brost@intel.com (cherry picked from commit fe4f5d4b661666a45b48fe7f95443f8fefc09c8c) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 2 -- drivers/gpu/drm/xe/xe_device_types.h | 14 +++++++++++--- drivers/gpu/drm/xe/xe_drm_client.c | 9 ++++++++- drivers/gpu/drm/xe/xe_exec_queue.c | 2 -- drivers/gpu/drm/xe/xe_vm.c | 4 ---- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index fb7ac06aeef8..5a63d135ba96 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -171,10 +171,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); - mutex_unlock(&xef->vm.lock); xe_file_put(xef); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 00e370dcf4a9..09d731a9125c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -570,15 +570,23 @@ struct xe_file { struct { /** @vm.xe: xarray to store VMs */ struct xarray xa; - /** @vm.lock: protects file VM state */ + /** + * @vm.lock: Protects VM lookup + reference and removal a from + * file xarray. Not an intended to be an outer lock which does + * thing while being held. + */ struct mutex lock; } vm; /** @exec_queue: Submission exec queue state for file */ struct { - /** @exec_queue.xe: xarray to store engines */ + /** @exec_queue.xa: xarray to store exece queues */ struct xarray xa; - /** @exec_queue.lock: protects file engine state */ + /** + * @exec_queue.lock: Protects exec queue lookup + reference and + * removal a frommfile xarray. Not an intended to be an outer + * lock which does thing while being held. + */ struct mutex lock; } exec_queue; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index c4add8b38bbd..fb52a23e28f8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -283,8 +283,15 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) + xa_for_each(&xef->exec_queue.xa, i, q) { + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + xe_exec_queue_update_run_ticks(q); + + mutex_lock(&xef->exec_queue.lock); + xe_exec_queue_put(q); + } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7f28b7fc68d5..7743ebdcbf4b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -635,9 +635,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } } - mutex_lock(&xef->exec_queue.lock); err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); if (err) goto kill_exec_queue; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a3d7cb7cfd22..31fe31db3fdc 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1765,9 +1765,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - mutex_lock(&xef->vm.lock); err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->vm.lock); if (err) goto err_close_and_put; @@ -1799,9 +1797,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return 0; err_free_id: - mutex_lock(&xef->vm.lock); xa_erase(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); err_close_and_put: xe_vm_close_and_put(vm); From 74231870cf4976f69e83aa24f48edb16619f652f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 25 Sep 2024 08:14:27 +0100 Subject: [PATCH 37/41] drm/xe/vm: move xa_alloc to prevent UAF Evil user can guess the next id of the vm before the ioctl completes and then call vm destroy ioctl to trigger UAF since create ioctl is still referencing the same vm. Move the xa_alloc all the way to the end to prevent this. v2: - Rebase Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240925071426.144015-3-matthew.auld@intel.com (cherry picked from commit dcfd3971327f3ee92765154baebbaece833d3ca9) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 31fe31db3fdc..ce9dca4d4e87 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1765,10 +1765,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - if (err) - goto err_close_and_put; - if (xe->info.has_asid) { down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, @@ -1776,12 +1772,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, &xe->usm.next_asid, GFP_KERNEL); up_write(&xe->usm.lock); if (err < 0) - goto err_free_id; + goto err_close_and_put; vm->usm.asid = asid; } - args->vm_id = id; vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ @@ -1794,10 +1789,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); #endif + /* user id alloc must always be last in ioctl to prevent UAF */ + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_close_and_put; + + args->vm_id = id; + return 0; -err_free_id: - xa_erase(&xef->vm.xa, id); err_close_and_put: xe_vm_close_and_put(vm); From 67801fa67b94ebd0e4da7a77ac2d9f321b75fbe0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 25 Sep 2024 08:14:28 +0100 Subject: [PATCH 38/41] drm/xe/queue: move xa_alloc to prevent UAF Evil user can guess the next id of the queue before the ioctl completes and then call queue destroy ioctl to trigger UAF since create ioctl is still referencing the same queue. Move the xa_alloc all the way to the end to prevent this. v2: - Rebase Fixes: 2149ded63079 ("drm/xe: Fix use after free when client stats are captured") Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240925071426.144015-4-matthew.auld@intel.com (cherry picked from commit 16536582ddbebdbdf9e1d7af321bbba2bf955a87) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec_queue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7743ebdcbf4b..d098d2dd1b2d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -635,12 +635,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } } + q->xef = xe_file_get(xef); + + /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); if (err) goto kill_exec_queue; args->exec_queue_id = id; - q->xef = xe_file_get(xef); return 0; From 8135f1c09dd2eecee7cb637f7ec9a29e57300eb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 24 Sep 2024 14:37:13 -0700 Subject: [PATCH 39/41] drm/xe/oa: Don't reset OAC_CONTEXT_ENABLE on OA stream close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa testing on Xe2+ revealed that when OA metrics are collected for an exec_queue, after the OA stream is closed, future batch buffers submitted on that exec_queue do not complete. Not resetting OAC_CONTEXT_ENABLE on OA stream close resolves these hangs and should not have any adverse effects. v2: Make the change that we don't reset the bit clearer (Ashutosh) Also make the same fix for OAC as OAR (Ashutosh) Bspec: 60314 Fixes: 2f4a730fcd2d ("drm/xe/oa: Add OAR support") Fixes: 14e077f8006d ("drm/xe/oa: Add OAC support") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2821 Signed-off-by: José Roberto de Souza Signed-off-by: Ashutosh Dixit Cc: stable@vger.kernel.org Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240924213713.3497992-1-ashutosh.dixit@intel.com (cherry picked from commit 0c8650b09a365f4a31fca1d1d1e9d99c56071128) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_oa.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eae38a49ee8e..2804f14f8f29 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -709,8 +709,7 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), }, }; struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; @@ -742,10 +741,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, - enable ? CTX_CTRL_RUN_ALONE : 0), + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; From 7257d9c9a3c6cfe26c428e9b7ae21d61f2f55a79 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Fri, 27 Sep 2024 09:13:08 -0700 Subject: [PATCH 40/41] drm/xe: Prevent null pointer access in xe_migrate_copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_migrate_copy designed to copy content of TTM resources. When source resource is null, it will trigger a NULL pointer dereference in xe_migrate_copy. To avoid this situation, update lacks source flag to true for this case, the flag will trigger xe_migrate_clear rather than xe_migrate_copy. Issue trace: <7> [317.089847] xe 0000:00:02.0: [drm:xe_migrate_copy [xe]] Pass 14, sizes: 4194304 & 4194304 <7> [317.089945] xe 0000:00:02.0: [drm:xe_migrate_copy [xe]] Pass 15, sizes: 4194304 & 4194304 <1> [317.128055] BUG: kernel NULL pointer dereference, address: 0000000000000010 <1> [317.128064] #PF: supervisor read access in kernel mode <1> [317.128066] #PF: error_code(0x0000) - not-present page <6> [317.128069] PGD 0 P4D 0 <4> [317.128071] Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI <4> [317.128074] CPU: 1 UID: 0 PID: 1440 Comm: kunit_try_catch Tainted: G U N 6.11.0-rc7-xe #1 <4> [317.128078] Tainted: [U]=USER, [N]=TEST <4> [317.128080] Hardware name: Intel Corporation Lunar Lake Client Platform/LNL-M LP5 RVP1, BIOS LNLMFWI1.R00.3221.D80.2407291239 07/29/2024 <4> [317.128082] RIP: 0010:xe_migrate_copy+0x66/0x13e0 [xe] <4> [317.128158] Code: 00 00 48 89 8d e0 fe ff ff 48 8b 40 10 4c 89 85 c8 fe ff ff 44 88 8d bd fe ff ff 65 48 8b 3c 25 28 00 00 00 48 89 7d d0 31 ff <8b> 79 10 48 89 85 a0 fe ff ff 48 8b 00 48 89 b5 d8 fe ff ff 83 ff <4> [317.128162] RSP: 0018:ffffc9000167f9f0 EFLAGS: 00010246 <4> [317.128164] RAX: ffff8881120d8028 RBX: ffff88814d070428 RCX: 0000000000000000 <4> [317.128166] RDX: ffff88813cb99c00 RSI: 0000000004000000 RDI: 0000000000000000 <4> [317.128168] RBP: ffffc9000167fbb8 R08: ffff88814e7b1f08 R09: 0000000000000001 <4> [317.128170] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88814e7b1f08 <4> [317.128172] R13: ffff88814e7b1f08 R14: ffff88813cb99c00 R15: 0000000000000001 <4> [317.128174] FS: 0000000000000000(0000) GS:ffff88846f280000(0000) knlGS:0000000000000000 <4> [317.128176] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [317.128178] CR2: 0000000000000010 CR3: 000000011f676004 CR4: 0000000000770ef0 <4> [317.128180] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 <4> [317.128182] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400 <4> [317.128184] PKRU: 55555554 <4> [317.128185] Call Trace: <4> [317.128187] <4> [317.128189] ? show_regs+0x67/0x70 <4> [317.128194] ? __die_body+0x20/0x70 <4> [317.128196] ? __die+0x2b/0x40 <4> [317.128198] ? page_fault_oops+0x15f/0x4e0 <4> [317.128203] ? do_user_addr_fault+0x3fb/0x970 <4> [317.128205] ? lock_acquire+0xc7/0x2e0 <4> [317.128209] ? exc_page_fault+0x87/0x2b0 <4> [317.128212] ? asm_exc_page_fault+0x27/0x30 <4> [317.128216] ? xe_migrate_copy+0x66/0x13e0 [xe] <4> [317.128263] ? __lock_acquire+0xb9d/0x26f0 <4> [317.128265] ? __lock_acquire+0xb9d/0x26f0 <4> [317.128267] ? sg_free_append_table+0x20/0x80 <4> [317.128271] ? lock_acquire+0xc7/0x2e0 <4> [317.128273] ? mark_held_locks+0x4d/0x80 <4> [317.128275] ? trace_hardirqs_on+0x1e/0xd0 <4> [317.128278] ? _raw_spin_unlock_irqrestore+0x31/0x60 <4> [317.128281] ? __pm_runtime_resume+0x60/0xa0 <4> [317.128284] xe_bo_move+0x682/0xc50 [xe] <4> [317.128315] ? lock_is_held_type+0xaa/0x120 <4> [317.128318] ttm_bo_handle_move_mem+0xe5/0x1a0 [ttm] <4> [317.128324] ttm_bo_validate+0xd1/0x1a0 [ttm] <4> [317.128328] shrink_test_run_device+0x721/0xc10 [xe] <4> [317.128360] ? find_held_lock+0x31/0x90 <4> [317.128363] ? lock_release+0xd1/0x2a0 <4> [317.128365] ? __pfx_kunit_generic_run_threadfn_adapter+0x10/0x10 [kunit] <4> [317.128370] xe_bo_shrink_kunit+0x11/0x20 [xe] <4> [317.128397] kunit_try_run_case+0x6e/0x150 [kunit] <4> [317.128400] ? trace_hardirqs_on+0x1e/0xd0 <4> [317.128402] ? _raw_spin_unlock_irqrestore+0x31/0x60 <4> [317.128404] kunit_generic_run_threadfn_adapter+0x1e/0x40 [kunit] <4> [317.128407] kthread+0xf5/0x130 <4> [317.128410] ? __pfx_kthread+0x10/0x10 <4> [317.128412] ret_from_fork+0x39/0x60 <4> [317.128415] ? __pfx_kthread+0x10/0x10 <4> [317.128416] ret_from_fork_asm+0x1a/0x30 <4> [317.128420] Fixes: 266c85885263 ("drm/xe/xe2: Handle flat ccs move for igfx.") Signed-off-by: Zhanjun Dong Reviewed-by: Thomas Hellström Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240927161308.862323-2-zhanjun.dong@intel.com (cherry picked from commit 59a1c9c7e1d02b43b415ea92627ce095b7c79e47) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f379df3a12bf..e5f51fd23c65 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -680,8 +680,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : - (!mem_type_is_vram(old_mem_type) && !tt_has_data); + move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data)); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); From a6f3b2527375c786f2eff77d3ee8b805bcfe026d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 27 Sep 2024 16:22:28 -0700 Subject: [PATCH 41/41] drm/xe: Fix memory leak when aborting binds Make sure to call xe_pt_update_ops_fini in xe_pt_update_ops_abort to free any memory the bind allocated. Caught by kmemleak when running Vulkan CTS tests on LNL. The leak seems to happen only when there's some kind of failure happening, like the lack of memory. Example output: unreferenced object 0xffff9120bdf62000 (size 8192): comm "deqp-vk", pid 115008, jiffies 4310295728 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 1b 05 f9 28 01 00 00 40 ...........(...@ 00 00 00 00 00 00 00 00 1b 15 f9 28 01 00 00 40 ...........(...@ backtrace (crc 7a56be79): [] __kmalloc_cache_noprof+0x310/0x3d0 [] xe_pt_new_shared.constprop.0+0x81/0xb0 [xe] [] xe_pt_insert_entry+0xb9/0x140 [xe] [] xe_pt_stage_bind_entry+0x12d/0x5b0 [xe] [] xe_pt_walk_range+0xea/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_stage_bind.constprop.0+0x25f/0x580 [xe] [] bind_op_prepare+0xea/0x6e0 [xe] [] xe_pt_update_ops_prepare+0x1c8/0x440 [xe] [] ops_execute+0x143/0x850 [xe] [] vm_bind_ioctl_ops_execute+0x244/0x800 [xe] [] xe_vm_bind_ioctl+0x1877/0x2370 [xe] [] drm_ioctl_kernel+0xb3/0x110 [drm] unreferenced object 0xffff9120bdf72000 (size 8192): comm "deqp-vk", pid 115008, jiffies 4310295728 hex dump (first 32 bytes): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk backtrace (crc 23b2f0b5): [] __kmalloc_cache_noprof+0x310/0x3d0 [] xe_pt_new_shared.constprop.0+0x81/0xb0 [xe] [] xe_pt_stage_unbind_post_descend+0xb3/0x150 [xe] [] xe_pt_walk_range+0x246/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_shared+0xc1/0x110 [xe] [] xe_pt_stage_unbind+0x9a/0xd0 [xe] [] unbind_op_prepare+0xdd/0x270 [xe] [] xe_pt_update_ops_prepare+0x106/0x440 [xe] [] ops_execute+0x143/0x850 [xe] [] vm_bind_ioctl_ops_execute+0x244/0x800 [xe] [] xe_vm_bind_ioctl+0x1877/0x2370 [xe] [] drm_ioctl_kernel+0xb3/0x110 [drm] [] drm_ioctl+0x280/0x4e0 [drm] Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2877 Fixes: a708f6501c69 ("drm/xe: Update PT layer with better error handling") Signed-off-by: Matthew Brost Reviewed-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20240927232228.3255246-1-matthew.brost@intel.com (cherry picked from commit 63e0695597a044c96bf369e4d8ba031291449d95) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d6353e8969f0..f27f579f4d85 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2188,5 +2188,5 @@ void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) pt_op->num_entries); } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); + xe_pt_update_ops_fini(tile, vops); }