drm/xe/guc: Handle timing out of signaled jobs gracefully
Timing out of signaled jobs can happen during regular operations (e.g. an exec queue closed immediately after last fence signaled). The TDR can pass the worker which free jobs. Rather than running through the TDR if signaled job is found, simply free it without any debug messages. Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reported-by: José Roberto de Souza <jose.souza@intel.com> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1271 Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Tested-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240223204659.40750-1-matthew.brost@intel.com
This commit is contained in:
parent
ba6bbdc6ea
commit
e275d61c5f
@ -929,20 +929,26 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
|
||||
int err = -ETIME;
|
||||
int i = 0;
|
||||
|
||||
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
|
||||
drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
|
||||
xe_sched_job_seqno(job), q->guc->id, q->flags);
|
||||
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
|
||||
"Kernel-submitted job timed out\n");
|
||||
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
|
||||
"VM job timed out on non-killed execqueue\n");
|
||||
/*
|
||||
* TDR has fired before free job worker. Common if exec queue
|
||||
* immediately closed after last fence signaled.
|
||||
*/
|
||||
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
|
||||
guc_exec_queue_free_job(drm_job);
|
||||
|
||||
simple_error_capture(q);
|
||||
xe_devcoredump(job);
|
||||
} else {
|
||||
drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
|
||||
xe_sched_job_seqno(job), q->guc->id, q->flags);
|
||||
return DRM_GPU_SCHED_STAT_NOMINAL;
|
||||
}
|
||||
|
||||
drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
|
||||
xe_sched_job_seqno(job), q->guc->id, q->flags);
|
||||
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
|
||||
"Kernel-submitted job timed out\n");
|
||||
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
|
||||
"VM job timed out on non-killed execqueue\n");
|
||||
|
||||
simple_error_capture(q);
|
||||
xe_devcoredump(job);
|
||||
|
||||
trace_xe_sched_job_timedout(job);
|
||||
|
||||
/* Kill the run_job entry point */
|
||||
|
Loading…
Reference in New Issue
Block a user