accel/ivpu: Fix locking order in ivpu_job_submit
commit ab680dc6c78aa035e944ecc8c48a1caab9f39924 upstream. Fix deadlock in job submission and abort handling. When a thread aborts currently executing jobs due to a fault, it first locks the global lock protecting submitted_jobs (#1). After the last job is destroyed, it proceeds to release the related context and locks file_priv (#2). Meanwhile, in the job submission thread, the file_priv lock (#2) is taken first, and then the submitted_jobs lock (#1) is obtained when a job is added to the submitted jobs list. CPU0 CPU1 ---- ---- (for example due to a fault) (jobs submissions keep coming) lock(&vdev->submitted_jobs_lock) #1 ivpu_jobs_abort_all() job_destroy() lock(&file_priv->lock) #2 lock(&vdev->submitted_jobs_lock) #1 file_priv_release() lock(&vdev->context_list_lock) lock(&file_priv->lock) #2 This order of locking causes a deadlock. To resolve this issue, change the order of locking in ivpu_job_submit(). Signed-off-by: Karol Wachowski <karol.wachowski@intel.com> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com> Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-12-maciej.falkowski@linux.intel.com [ This backport required small adjustments to ivpu_job_submit(), which lacks support for explicit command queue creation added in 6.15. ] Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
3e3062be7d
commit
079d2622f8
@@ -535,6 +535,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
|
|||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
mutex_lock(&vdev->submitted_jobs_lock);
|
||||||
mutex_lock(&file_priv->lock);
|
mutex_lock(&file_priv->lock);
|
||||||
|
|
||||||
cmdq = ivpu_cmdq_acquire(file_priv, job->engine_idx, priority);
|
cmdq = ivpu_cmdq_acquire(file_priv, job->engine_idx, priority);
|
||||||
@@ -542,11 +543,9 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
|
|||||||
ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n",
|
ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n",
|
||||||
file_priv->ctx.id, job->engine_idx, priority);
|
file_priv->ctx.id, job->engine_idx, priority);
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto err_unlock_file_priv;
|
goto err_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&vdev->submitted_jobs_lock);
|
|
||||||
|
|
||||||
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
|
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
|
||||||
ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
|
ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
|
||||||
&file_priv->job_id_next, GFP_KERNEL);
|
&file_priv->job_id_next, GFP_KERNEL);
|
||||||
@@ -554,7 +553,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
|
|||||||
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
|
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
|
||||||
file_priv->ctx.id);
|
file_priv->ctx.id);
|
||||||
ret = -EBUSY;
|
ret = -EBUSY;
|
||||||
goto err_unlock_submitted_jobs;
|
goto err_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ivpu_cmdq_push_job(cmdq, job);
|
ret = ivpu_cmdq_push_job(cmdq, job);
|
||||||
@@ -576,22 +575,20 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
|
|||||||
job->job_id, file_priv->ctx.id, job->engine_idx, priority,
|
job->job_id, file_priv->ctx.id, job->engine_idx, priority,
|
||||||
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
|
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
|
||||||
|
|
||||||
mutex_unlock(&vdev->submitted_jobs_lock);
|
|
||||||
mutex_unlock(&file_priv->lock);
|
mutex_unlock(&file_priv->lock);
|
||||||
|
|
||||||
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
|
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
|
||||||
mutex_lock(&vdev->submitted_jobs_lock);
|
|
||||||
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
|
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
|
||||||
mutex_unlock(&vdev->submitted_jobs_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&vdev->submitted_jobs_lock);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err_erase_xa:
|
err_erase_xa:
|
||||||
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
|
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
|
||||||
err_unlock_submitted_jobs:
|
err_unlock:
|
||||||
mutex_unlock(&vdev->submitted_jobs_lock);
|
mutex_unlock(&vdev->submitted_jobs_lock);
|
||||||
err_unlock_file_priv:
|
|
||||||
mutex_unlock(&file_priv->lock);
|
mutex_unlock(&file_priv->lock);
|
||||||
ivpu_rpm_put(vdev);
|
ivpu_rpm_put(vdev);
|
||||||
return ret;
|
return ret;
|
||||||
|
|||||||
Reference in New Issue
Block a user