drm/xe: Move the coredump registration to the worker thread

Adding lockdep checking to the coredump code showed that there was an
existing violation. The dev_coredumpm_timeout() call is used to
register the dump with the base coredump subsystem. However, that
makes multiple memory allocations, only some of which use the GFP_
flags passed in. So that also needs to be deferred to the worker
function where it is safe to allocate with arbitrary flags.

In order to not add protoypes for the callback functions, moving the
_timeout call also means moving the worker thread function to later in
the file.

v2: Rebased after other changes to the worker function.

Fixes: e799485044 ("drm/xe: Introduce the dev_coredump infrastructure.")
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: intel-xe@lists.freedesktop.org
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linaro-mm-sig@lists.linaro.org
Cc: <stable@vger.kernel.org> # v6.8+
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241128210824.3302147-3-John.C.Harrison@Intel.com
(cherry picked from commit 90f51a7f4ec1004fc4ddfbc6d1f1068d85ef4771)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
This commit is contained in:
John Harrison 2024-11-28 13:08:23 -08:00 committed by Thomas Hellström
parent 4495816122
commit 5dce85fecb

View File

@ -155,36 +155,6 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
ss->vm = NULL; ss->vm = NULL;
} }
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
{
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
struct xe_device *xe = coredump_to_xe(coredump);
unsigned int fw_ref;
xe_pm_runtime_get(xe);
/* keep going if fw fails as we still want to save the memory and SW data */
fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
xe_vm_snapshot_capture_delayed(ss->vm);
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
xe_pm_runtime_put(xe);
/* Calculate devcoredump size */
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
if (!ss->read.buffer)
return;
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
xe_devcoredump_snapshot_free(ss);
}
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen) size_t count, void *data, size_t datalen)
{ {
@ -234,6 +204,45 @@ static void xe_devcoredump_free(void *data)
"Xe device coredump has been deleted.\n"); "Xe device coredump has been deleted.\n");
} }
static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
{
struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot);
struct xe_device *xe = coredump_to_xe(coredump);
unsigned int fw_ref;
/*
* NB: Despite passing a GFP_ flags parameter here, more allocations are done
* internally using GFP_KERNEL expliictly. Hence this call must be in the worker
* thread and not in the initial capture call.
*/
dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
xe_devcoredump_read, xe_devcoredump_free,
XE_COREDUMP_TIMEOUT_JIFFIES);
xe_pm_runtime_get(xe);
/* keep going if fw fails as we still want to save the memory and SW data */
fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
xe_vm_snapshot_capture_delayed(ss->vm);
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
xe_pm_runtime_put(xe);
/* Calculate devcoredump size */
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
if (!ss->read.buffer)
return;
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
xe_devcoredump_snapshot_free(ss);
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump, static void devcoredump_snapshot(struct xe_devcoredump *coredump,
struct xe_sched_job *job) struct xe_sched_job *job)
{ {
@ -310,10 +319,6 @@ void xe_devcoredump(struct xe_sched_job *job)
drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Xe device coredump has been created\n");
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
xe->drm.primary->index); xe->drm.primary->index);
dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
xe_devcoredump_read, xe_devcoredump_free,
XE_COREDUMP_TIMEOUT_JIFFIES);
} }
static void xe_driver_devcoredump_fini(void *arg) static void xe_driver_devcoredump_fini(void *arg)