From 4495816122cc39c428ebbc4ffd30110bb2877df9 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Tue, 26 Nov 2024 12:10:52 -0800 Subject: [PATCH 1/2] drm/xe/guc: Fix missing init value and add register order check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix missing initial value for last_value. For GuC capture register definition, it is required to define 64bit register in a pair of 2 consecutive 32bit register entries, low first, then hi. Add code to check this order. Changes from prior revs: v5:- Correct cross-line comment format v4:- Fix warn on condition and remove skipping v3:- Move break inside brace v2:- Correct the fix tag pointed commit Add examples in comments for warning Add 1 missing hi condition check Fixes: ecb633646391 ("drm/xe/guc: Plumb GuC-capture into dev coredump") Signed-off-by: Zhanjun Dong Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20241126201052.1937079-1-zhanjun.dong@intel.com (cherry picked from commit 6f59fbcfa041e7d69e5e5f39d4c8cffa06fdc50b) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_capture.c | 77 +++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index cc72446a5de1..d63912d28246 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -102,6 +102,7 @@ struct __guc_capture_parsed_output { * A 64 bit register define requires 2 consecutive entries, * with low dword first and hi dword the second. * 2. Register name: null for incompleted define + * 3. Incorrect order will trigger XE_WARN. */ #define COMMON_XELP_BASE_GLOBAL \ { FORCEWAKE_GT, REG_32BIT, 0, 0, "FORCEWAKE_GT"} @@ -1675,10 +1676,10 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ struct xe_devcoredump *devcoredump = &xe->devcoredump; struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot; struct gcap_reg_list_info *reginfo = NULL; - u32 last_value, i; - bool is_ext; + u32 i, last_value = 0; + bool is_ext, low32_ready = false; - if (!list || list->num_regs == 0) + if (!list || !list->list || list->num_regs == 0) return; XE_WARN_ON(!devcore_snapshot->matched_node); @@ -1701,29 +1702,75 @@ snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_ continue; value = reg->value; - if (reg_desc->data_type == REG_64BIT_LOW_DW) { + switch (reg_desc->data_type) { + case REG_64BIT_LOW_DW: last_value = value; + + /* + * A 64 bit register define requires 2 consecutive + * entries in register list, with low dword first + * and hi dword the second, like: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, "XXX_REG"}, + * + * Incorrect order will trigger XE_WARN. + * + * Possible double low here, for example: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + */ + XE_WARN_ON(low32_ready); + low32_ready = true; /* Low 32 bit dword saved, continue for high 32 bit */ - continue; - } else if (reg_desc->data_type == REG_64BIT_HI_DW) { + break; + + case REG_64BIT_HI_DW: { u64 value_qw = ((u64)value << 32) | last_value; + /* + * Incorrect 64bit register order. Possible missing low. + * for example: + * { XXX_REG(0), REG_32BIT, 0, 0, NULL}, + * { XXX_REG_HI(0), REG_64BIT_HI_DW, 0, 0, NULL}, + */ + XE_WARN_ON(!low32_ready); + low32_ready = false; + drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw); - continue; + break; } - if (is_ext) { - int dss, group, instance; + case REG_32BIT: + /* + * Incorrect 64bit register order. Possible missing high. + * for example: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * { XXX_REG(0), REG_32BIT, 0, 0, "XXX_REG"}, + */ + XE_WARN_ON(low32_ready); - group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); - instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); - dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); + if (is_ext) { + int dss, group, instance; - drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); - } else { - drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); + group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); + instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); + dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); + + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); + } else { + drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); + } + break; } } + + /* + * Incorrect 64bit register order. Possible missing high. + * for example: + * { XXX_REG_LO(0), REG_64BIT_LOW_DW, 0, 0, NULL}, + * } // <- Register list end + */ + XE_WARN_ON(low32_ready); } /** From 5dce85fecb87751ec94526e1ac516dd7871e2e0c Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 28 Nov 2024 13:08:23 -0800 Subject: [PATCH 2/2] drm/xe: Move the coredump registration to the worker thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding lockdep checking to the coredump code showed that there was an existing violation. The dev_coredumpm_timeout() call is used to register the dump with the base coredump subsystem. However, that makes multiple memory allocations, only some of which use the GFP_ flags passed in. So that also needs to be deferred to the worker function where it is safe to allocate with arbitrary flags. In order to not add protoypes for the callback functions, moving the _timeout call also means moving the worker thread function to later in the file. v2: Rebased after other changes to the worker function. Fixes: e799485044cb ("drm/xe: Introduce the dev_coredump infrastructure.") Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Cc: Daniel Vetter Cc: Francois Dugast Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Sumit Semwal Cc: "Christian König" Cc: intel-xe@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: # v6.8+ Signed-off-by: John Harrison Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20241128210824.3302147-3-John.C.Harrison@Intel.com (cherry picked from commit 90f51a7f4ec1004fc4ddfbc6d1f1068d85ef4771) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_devcoredump.c | 73 +++++++++++++++-------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 0b0cd6aa1d9f..f8947e7e917e 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -155,36 +155,6 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) ss->vm = NULL; } -static void xe_devcoredump_deferred_snap_work(struct work_struct *work) -{ - struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); - struct xe_device *xe = coredump_to_xe(coredump); - unsigned int fw_ref; - - xe_pm_runtime_get(xe); - - /* keep going if fw fails as we still want to save the memory and SW data */ - fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); - xe_vm_snapshot_capture_delayed(ss->vm); - xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); - xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); - - xe_pm_runtime_put(xe); - - /* Calculate devcoredump size */ - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); - - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); - if (!ss->read.buffer) - return; - - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); - xe_devcoredump_snapshot_free(ss); -} - static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { @@ -234,6 +204,45 @@ static void xe_devcoredump_free(void *data) "Xe device coredump has been deleted.\n"); } +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + struct xe_devcoredump *coredump = container_of(ss, typeof(*coredump), snapshot); + struct xe_device *xe = coredump_to_xe(coredump); + unsigned int fw_ref; + + /* + * NB: Despite passing a GFP_ flags parameter here, more allocations are done + * internally using GFP_KERNEL expliictly. Hence this call must be in the worker + * thread and not in the initial capture call. + */ + dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, + xe_devcoredump_read, xe_devcoredump_free, + XE_COREDUMP_TIMEOUT_JIFFIES); + + xe_pm_runtime_get(xe); + + /* keep going if fw fails as we still want to save the memory and SW data */ + fw_ref = xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); + xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); + + xe_pm_runtime_put(xe); + + /* Calculate devcoredump size */ + ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); + + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + return; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); + xe_devcoredump_snapshot_free(ss); +} + static void devcoredump_snapshot(struct xe_devcoredump *coredump, struct xe_sched_job *job) { @@ -310,10 +319,6 @@ void xe_devcoredump(struct xe_sched_job *job) drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", xe->drm.primary->index); - - dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, - xe_devcoredump_read, xe_devcoredump_free, - XE_COREDUMP_TIMEOUT_JIFFIES); } static void xe_driver_devcoredump_fini(void *arg)