amd-drm-next-6.12-2024-09-13:

amdgpu:
 - GPUVM sync fixes
 - kdoc fixes
 - Misc spelling mistakes
 - Add some raven GFXOFF quirks
 - Use clamp helper
 - DC fixes
 - JPEG fixes
 - Process isolation fix
 - Queue reset fix
 - W=1 cleanup
 - SMU14 fixes
 - JPEG fixes
 
 amdkfd:
 - Fetch cacheline info from IP discovery
 - Queue reset fix
 - RAS fix
 - Document SVM events
 - CRIU fixes
 - Race fix in dma-buf handling
 
 drm:
 - dma-buf fd race fixes
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZuQ+9wAKCRC93/aFa7yZ
 2CNzAQD/LpAjMlHlHK2vwR7LkGhC+sy06a44zD1M+hf5HwgVsQD8D5CVt5WiNAtT
 ULEzeA0IfTopJRI8aLhAaOOD7ln8igg=
 =83EZ
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-next-6.12-2024-09-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.12-2024-09-13:

amdgpu:
- GPUVM sync fixes
- kdoc fixes
- Misc spelling mistakes
- Add some raven GFXOFF quirks
- Use clamp helper
- DC fixes
- JPEG fixes
- Process isolation fix
- Queue reset fix
- W=1 cleanup
- SMU14 fixes
- JPEG fixes

amdkfd:
- Fetch cacheline info from IP discovery
- Queue reset fix
- RAS fix
- Document SVM events
- CRIU fixes
- Race fix in dma-buf handling

drm:
- dma-buf fd race fixes

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240913134139.2861073-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2024-09-17 01:05:31 +10:00
commit 26df39de93
50 changed files with 679 additions and 367 deletions

View File

@ -39,23 +39,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
subdir-ccflags-y := -Wextra
subdir-ccflags-y += -Wunused
subdir-ccflags-y += -Wmissing-prototypes
subdir-ccflags-y += -Wmissing-declarations
subdir-ccflags-y += -Wmissing-include-dirs
subdir-ccflags-y += -Wold-style-definition
subdir-ccflags-y += -Wmissing-format-attribute
# Need this to avoid recursive variable evaluation issues
cond-flags := $(call cc-option, -Wunused-but-set-variable) \
$(call cc-option, -Wunused-const-variable) \
$(call cc-option, -Wstringop-truncation) \
$(call cc-option, -Wpacked-not-aligned)
subdir-ccflags-y += $(cond-flags)
subdir-ccflags-y += -Wno-unused-parameter
subdir-ccflags-y += -Wno-type-limits
subdir-ccflags-y += -Wno-sign-compare
subdir-ccflags-y += -Wno-missing-field-initializers
# Locally disable W=1 warnings enabled in drm subsystem Makefile
subdir-ccflags-y += -Wno-override-init
subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror

View File

@ -237,6 +237,7 @@ extern int sched_policy;
extern bool debug_evictions;
extern bool no_system_mem_limit;
extern int halt_if_hws_hang;
extern uint amdgpu_svm_default_granularity;
#else
static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
static const bool __maybe_unused debug_evictions; /* = false */

View File

@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/module.h>
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include "amdgpu.h"

View File

@ -25,7 +25,6 @@
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/fdtable.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
@ -818,18 +817,13 @@ static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
if (!mem->dmabuf) {
struct amdgpu_device *bo_adev;
struct dma_buf *dmabuf;
int r, fd;
bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file,
dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file,
mem->gem_handle,
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DRM_RDWR : 0, &fd);
if (r)
return r;
dmabuf = dma_buf_get(fd);
close_fd(fd);
if (WARN_ON_ONCE(IS_ERR(dmabuf)))
DRM_RDWR : 0);
if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);
mem->dmabuf = dmabuf;
}

View File

@ -169,6 +169,16 @@ uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu;
char *amdgpu_virtual_display;
bool enforce_isolation;
/* Specifies the default granularity for SVM, used in buffer
* migration and restoration of backing memory when handling
* recoverable page faults.
*
* The value is given as log(numPages(buffer)); for a 2 MiB
* buffer it computes to be 9
*/
uint amdgpu_svm_default_granularity = 9;
/*
* OverDrive(bit 14) disabled by default
* GFX DCS(bit 19) disabled by default
@ -320,6 +330,13 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
* DOC: svm_default_granularity (uint)
* Used in buffer migration and handling of recoverable page faults
*/
MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB");
module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644);
/**
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.

View File

@ -1397,14 +1397,23 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
long timeout = msecs_to_jiffies(1000);
struct dma_fence *f = NULL;
struct drm_gpu_scheduler *sched = &ring->sched;
struct drm_sched_entity entity;
struct dma_fence *f;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
int i, r;
r = amdgpu_job_alloc_with_ib(adev, NULL, NULL,
64, AMDGPU_IB_POOL_DIRECT,
/* Initialize the scheduler entity */
r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
&sched, 1, NULL);
if (r) {
dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
goto err;
}
r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
64, 0,
&job);
if (r)
goto err;
@ -1416,24 +1425,18 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
ib->ptr[i] = ring->funcs->nop;
ib->length_dw = ring->funcs->align_mask + 1;
r = amdgpu_job_submit_direct(job, ring, &f);
f = amdgpu_job_submit(job);
r = dma_fence_wait(f, false);
if (r)
goto err_free;
goto err;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0)
r = -ETIMEDOUT;
else if (r > 0)
r = 0;
amdgpu_ib_free(adev, ib, f);
dma_fence_put(f);
/* Clean up the scheduler entity */
drm_sched_entity_destroy(&entity);
return 0;
err_free:
amdgpu_job_free(job);
amdgpu_ib_free(adev, ib, f);
err:
return r;
}

View File

@ -832,6 +832,7 @@ int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
struct mes_reset_queue_input queue_input;
int r;
queue_input.queue_type = queue_type;
queue_input.use_mmio = true;
queue_input.me_id = me_id;
queue_input.pipe_id = pipe_id;

View File

@ -94,7 +94,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n
ref_div_max = min(128 / post_div, ref_div_max);
/* get matching reference and feedback divider */
*ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
*ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max);
*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
/* limit fb divider to its maximum */

View File

@ -22,7 +22,6 @@
* Authors: Andres Rodriguez <andresx7@gmail.com>
*/
#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/pid.h>

View File

@ -844,7 +844,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
params.vm = vm;
params.immediate = immediate;
r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
r = vm->update_funcs->prepare(&params, NULL);
if (r)
goto error;
@ -908,10 +908,12 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
{
struct amdgpu_vm *vm = params->vm;
if (!fence || !*fence)
return;
tlb_cb->vm = vm;
if (!fence || !*fence) {
amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
return;
}
if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
amdgpu_vm_tlb_seq_cb)) {
dma_fence_put(vm->last_tlb_flush);
@ -939,7 +941,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
* @unlocked: unlocked invalidation during MM callback
* @flush_tlb: trigger tlb invalidation after update completed
* @allow_override: change MTYPE for local NUMA nodes
* @resv: fences we need to sync to
* @sync: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
@ -955,16 +957,16 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
* 0 for success, negative erro code for failure.
*/
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
bool immediate, bool unlocked, bool flush_tlb,
bool allow_override, struct amdgpu_sync *sync,
uint64_t start, uint64_t last, uint64_t flags,
uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence)
{
struct amdgpu_vm_tlb_seq_struct *tlb_cb;
struct amdgpu_vm_update_params params;
struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
int r, idx;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@ -997,14 +999,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.allow_override = allow_override;
INIT_LIST_HEAD(&params.tlb_flush_waitlist);
/* Implicitly sync to command submissions in the same VM before
* unmapping. Sync to moving fences before mapping.
*/
if (!(flags & AMDGPU_PTE_VALID))
sync_mode = AMDGPU_SYNC_EQ_OWNER;
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
@ -1019,7 +1013,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
dma_fence_put(tmp);
}
r = vm->update_funcs->prepare(&params, resv, sync_mode);
r = vm->update_funcs->prepare(&params, sync);
if (r)
goto error_free;
@ -1161,23 +1155,30 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
struct dma_fence **last_update;
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
struct dma_fence **last_update;
struct amdgpu_sync sync;
bool flush_tlb = clear;
bool uncached;
struct dma_resv *resv;
uint64_t vram_base;
uint64_t flags;
bool uncached;
int r;
amdgpu_sync_create(&sync);
if (clear || !bo) {
mem = NULL;
resv = vm->root.bo->tbo.base.resv;
/* Implicitly sync to command submissions in the same VM before
* unmapping.
*/
r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
AMDGPU_SYNC_EQ_OWNER, vm);
if (r)
goto error_free;
} else {
struct drm_gem_object *obj = &bo->tbo.base;
resv = bo->tbo.base.resv;
if (obj->import_attach && bo_va->is_xgmi) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
struct drm_gem_object *gobj = dma_buf->priv;
@ -1191,6 +1192,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
if (mem && (mem->mem_type == TTM_PL_TT ||
mem->mem_type == AMDGPU_PL_PREEMPT))
pages_addr = bo->tbo.ttm->dma_address;
/* Implicitly sync to moving fences before mapping anything */
r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
AMDGPU_SYNC_EXPLICIT, vm);
if (r)
goto error_free;
}
if (bo) {
@ -1240,12 +1247,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_update(mapping);
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
!uncached, resv, mapping->start, mapping->last,
update_flags, mapping->offset,
vram_base, mem, pages_addr,
last_update);
!uncached, &sync, mapping->start,
mapping->last, update_flags,
mapping->offset, vram_base, mem,
pages_addr, last_update);
if (r)
return r;
goto error_free;
}
/* If the BO is not in its preferred location add it back to
@ -1273,7 +1280,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_mapping(mapping);
}
return 0;
error_free:
amdgpu_sync_free(&sync);
return r;
}
/**
@ -1420,25 +1429,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence)
{
struct dma_resv *resv = vm->root.bo->tbo.base.resv;
struct amdgpu_bo_va_mapping *mapping;
uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
struct amdgpu_sync sync;
int r;
/*
* Implicitly sync to command submissions in the same VM before
* unmapping.
*/
amdgpu_sync_create(&sync);
r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
AMDGPU_SYNC_EQ_OWNER, vm);
if (r)
goto error_free;
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
resv, mapping->start, mapping->last,
init_pte_value, 0, 0, NULL, NULL,
&f);
&sync, mapping->start, mapping->last,
0, 0, 0, NULL, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
return r;
goto error_free;
}
}
@ -1449,7 +1467,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
dma_fence_put(f);
}
return 0;
error_free:
amdgpu_sync_free(&sync);
return r;
}
@ -2224,7 +2244,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
(1 << 30) - 1) >> 30;
vm_size = roundup_pow_of_two(
min(max(phys_ram_gb * 3, min_vm_size), max_size));
clamp(phys_ram_gb * 3, min_vm_size, max_size));
}
adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
@ -2403,7 +2423,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int32_t xcp_id)
{
struct amdgpu_ip_block *ip_block;
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
int r, i;
@ -2435,11 +2454,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
/* use CPU for page table update if SDMA is unavailable */
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA);
if (!ip_block || ip_block->status.valid == false)
vm->use_cpu_for_update = true;
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
@ -2770,6 +2784,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
* @pasid: PASID of the VM
* @ts: Timestamp of the fault
* @vmid: VMID, only used for GFX 9.4.3.
* @node_id: Node_id received in IH cookie. Only applicable for
* GFX 9.4.3.

View File

@ -304,8 +304,8 @@ struct amdgpu_vm_update_params {
struct amdgpu_vm_update_funcs {
int (*map_table)(struct amdgpu_bo_vm *bo);
int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv,
enum amdgpu_sync_mode sync_mode);
int (*prepare)(struct amdgpu_vm_update_params *p,
struct amdgpu_sync *sync);
int (*update)(struct amdgpu_vm_update_params *p,
struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr, uint64_t flags);
@ -505,9 +505,10 @@ int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
bool immediate, bool unlocked, bool flush_tlb,
bool allow_override, struct amdgpu_sync *sync,
uint64_t start, uint64_t last, uint64_t flags,
uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,

View File

@ -39,20 +39,18 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
* amdgpu_vm_cpu_prepare - prepare page table update with the CPU
*
* @p: see amdgpu_vm_update_params definition
* @resv: reservation object with embedded fence
* @sync_mode: synchronization mode
* @sync: sync obj with fences to wait on
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
struct dma_resv *resv,
enum amdgpu_sync_mode sync_mode)
struct amdgpu_sync *sync)
{
if (!resv)
if (!sync)
return 0;
return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true);
return amdgpu_sync_wait(sync, true);
}
/**

View File

@ -403,7 +403,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.vm = vm;
params.immediate = immediate;
r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
r = vm->update_funcs->prepare(&params, NULL);
if (r)
goto exit;

View File

@ -77,32 +77,24 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
* amdgpu_vm_sdma_prepare - prepare SDMA command submission
*
* @p: see amdgpu_vm_update_params definition
* @resv: reservation object with embedded fence
* @sync_mode: synchronization mode
* @sync: amdgpu_sync object with fences to wait for
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
struct dma_resv *resv,
enum amdgpu_sync_mode sync_mode)
struct amdgpu_sync *sync)
{
struct amdgpu_sync sync;
int r;
r = amdgpu_vm_sdma_alloc_job(p, 0);
if (r)
return r;
if (!resv)
if (!sync)
return 0;
amdgpu_sync_create(&sync);
r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm);
if (!r)
r = amdgpu_sync_push_to_job(&sync, p->job);
amdgpu_sync_free(&sync);
r = amdgpu_sync_push_to_job(sync, p->job);
if (r) {
p->num_dw_left = 0;
amdgpu_job_free(p->job);

View File

@ -1345,6 +1345,10 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
/* https://bbs.openkylin.top/t/topic/171497 */
{ 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
/* HP 705G4 DM with R5 2400G */
{ 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
{ 0, 0, 0, 0, 0 },
};

View File

@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
@ -34,6 +35,9 @@
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
{
@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4)));
if (ring->funcs->parse_cs)
amdgpu_ring_write(ring, 0);
else
amdgpu_ring_write(ring, (vmid | (vmid << 4)));
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
@ -554,6 +561,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
.parse_cs = jpeg_v1_dec_ring_parse_cs,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@ -611,3 +619,69 @@ static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
}
/**
* jpeg_v1_dec_ring_parse_cs - command submission parser
*
* @parser: Command submission parser context
* @job: the job to parse
* @ib: the IB to parse
*
* Parse the command stream, return -EINVAL for invalid packet,
* 0 otherwise
*/
static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
u32 i, reg, res, cond, type;
int ret = 0;
struct amdgpu_device *adev = parser->adev;
for (i = 0; i < ib->length_dw ; i += 2) {
reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
res = CP_PACKETJ_GET_RES(ib->ptr[i]);
cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
return -EINVAL;
if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
continue;
switch (type) {
case PACKETJ_TYPE0:
if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
reg != JPEG_V1_REG_CTX_INDEX &&
reg != JPEG_V1_REG_CTX_DATA) {
ret = -EINVAL;
}
break;
case PACKETJ_TYPE1:
if (reg != JPEG_V1_REG_CTX_DATA)
ret = -EINVAL;
break;
case PACKETJ_TYPE3:
if (reg != JPEG_V1_REG_SOFT_RESET)
ret = -EINVAL;
break;
case PACKETJ_TYPE6:
if (ib->ptr[i] != CP_PACKETJ_NOP)
ret = -EINVAL;
break;
default:
ret = -EINVAL;
}
if (ret) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
break;
}
}
return ret;
}

View File

@ -29,4 +29,15 @@ int jpeg_v1_0_sw_init(void *handle);
void jpeg_v1_0_sw_fini(void *handle);
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
#define JPEG_V1_REG_RANGE_START 0x8000
#define JPEG_V1_REG_RANGE_END 0x803f
#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238
#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239
#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a
#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b
#define JPEG_V1_REG_CTX_INDEX 0x8328
#define JPEG_V1_REG_CTX_DATA 0x8329
#define JPEG_V1_REG_SOFT_RESET 0x83a0
#endif /*__JPEG_V1_0_H__*/

View File

@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_cs.h"
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
@ -538,7 +539,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
if (ring->funcs->parse_cs)
amdgpu_ring_write(ring, 0);
else
amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@ -764,6 +769,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@ -810,3 +816,58 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
.rev = 0,
.funcs = &jpeg_v2_0_ip_funcs,
};
/**
* jpeg_v2_dec_ring_parse_cs - command submission parser
*
* @parser: Command submission parser context
* @job: the job to parse
* @ib: the IB to parse
*
* Parse the command stream, return -EINVAL for invalid packet,
* 0 otherwise
*/
int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
u32 i, reg, res, cond, type;
struct amdgpu_device *adev = parser->adev;
for (i = 0; i < ib->length_dw ; i += 2) {
reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
res = CP_PACKETJ_GET_RES(ib->ptr[i]);
cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
if (res) /* only support 0 at the moment */
return -EINVAL;
switch (type) {
case PACKETJ_TYPE0:
if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START ||
reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE3:
if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START ||
reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE6:
if (ib->ptr[i] == CP_PACKETJ_NOP)
continue;
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
default:
dev_err(adev->dev, "Unknown packet type %d !\n", type);
return -EINVAL;
}
}
return 0;
}

View File

@ -45,6 +45,9 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
#define JPEG_REG_RANGE_START 0x4000
#define JPEG_REG_RANGE_END 0x41c2
void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
@ -57,6 +60,9 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr);
void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;

View File

@ -662,6 +662,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@ -691,6 +692,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +

View File

@ -560,6 +560,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +

View File

@ -727,6 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +

View File

@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block {
};
extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
#endif /* __JPEG_V4_0_H__ */

View File

@ -23,9 +23,9 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
#include "jpeg_v4_0_3.h"
#include "mmsch_v4_0_3.h"
@ -59,6 +59,12 @@ static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG7_DECODE
};
static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
{
return amdgpu_sriov_vf(adev) ||
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4));
}
/**
* jpeg_v4_0_3_early_init - set function pointers
*
@ -734,32 +740,20 @@ void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
amdgpu_ring_write(ring, 0);
if (ring->adev->jpeg.inst[ring->me].aid_id) {
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x4);
} else {
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
amdgpu_ring_write(ring, 0);
}
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x3fbc);
if (ring->adev->jpeg.inst[ring->me].aid_id) {
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x0);
} else {
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
amdgpu_ring_write(ring, 0);
}
amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x1);
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
amdgpu_ring_write(ring, 0);
}
@ -834,8 +828,8 @@ void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
{
uint32_t reg_offset;
/* For VF, only local offsets should be used */
if (amdgpu_sriov_vf(ring->adev))
/* Use normalized offsets if required */
if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
reg = NORMALIZE_JPEG_REG_OFFSET(reg);
reg_offset = (reg << 2);
@ -881,8 +875,8 @@ void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint
{
uint32_t reg_offset;
/* For VF, only local offsets should be used */
if (amdgpu_sriov_vf(ring->adev))
/* Use normalized offsets if required */
if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
reg = NORMALIZE_JPEG_REG_OFFSET(reg);
reg_offset = (reg << 2);
@ -1089,7 +1083,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
.parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@ -1254,56 +1248,3 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
{
adev->jpeg.ras = &jpeg_v4_0_3_ras;
}
/**
* jpeg_v4_0_3_dec_ring_parse_cs - command submission parser
*
* @parser: Command submission parser context
* @job: the job to parse
* @ib: the IB to parse
*
* Parse the command stream, return -EINVAL for invalid packet,
* 0 otherwise
*/
int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
uint32_t i, reg, res, cond, type;
struct amdgpu_device *adev = parser->adev;
for (i = 0; i < ib->length_dw ; i += 2) {
reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
res = CP_PACKETJ_GET_RES(ib->ptr[i]);
cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
if (res) /* only support 0 at the moment */
return -EINVAL;
switch (type) {
case PACKETJ_TYPE0:
if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE3:
if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
}
break;
case PACKETJ_TYPE6:
if (ib->ptr[i] == CP_PACKETJ_NOP)
continue;
dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
return -EINVAL;
default:
dev_err(adev->dev, "Unknown packet type %d !\n", type);
return -EINVAL;
}
}
return 0;
}

View File

@ -46,9 +46,6 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
#define JPEG_REG_RANGE_START 0x4000
#define JPEG_REG_RANGE_END 0x41c2
extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
@ -65,7 +62,5 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
struct amdgpu_job *job,
struct amdgpu_ib *ib);
#endif /* __JPEG_V4_0_3_H__ */

View File

@ -768,6 +768,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +

View File

@ -26,6 +26,7 @@
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
#include "jpeg_v4_0_3.h"
#include "vcn/vcn_5_0_0_offset.h"
@ -646,7 +647,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
.parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
.parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +

View File

@ -415,7 +415,7 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
/* wait till dequeue take effects */
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
break;
break;
udelay(1);
}
if (i >= adev->usec_timeout) {

View File

@ -36,7 +36,6 @@
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
#include <linux/fdtable.h>
#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
@ -1835,7 +1834,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
}
static int criu_get_prime_handle(struct kgd_mem *mem,
int flags, u32 *shared_fd)
int flags, u32 *shared_fd,
struct file **file)
{
struct dma_buf *dmabuf;
int ret;
@ -1846,13 +1846,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
return ret;
}
ret = dma_buf_fd(dmabuf, flags);
ret = get_unused_fd_flags(flags);
if (ret < 0) {
pr_err("dmabuf create fd failed, ret:%d\n", ret);
goto out_free_dmabuf;
}
*shared_fd = ret;
*file = dmabuf->file;
return 0;
out_free_dmabuf:
@ -1860,6 +1861,25 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
return ret;
}
static void commit_files(struct file **files,
struct kfd_criu_bo_bucket *bo_buckets,
unsigned int count,
int err)
{
while (count--) {
struct file *file = files[count];
if (!file)
continue;
if (err) {
fput(file);
put_unused_fd(bo_buckets[count].dmabuf_fd);
} else {
fd_install(bo_buckets[count].dmabuf_fd, file);
}
}
}
static int criu_checkpoint_bos(struct kfd_process *p,
uint32_t num_bos,
uint8_t __user *user_bos,
@ -1868,6 +1888,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets;
struct kfd_criu_bo_priv_data *bo_privs;
struct file **files = NULL;
int ret = 0, pdd_index, bo_index = 0, id;
void *mem;
@ -1881,6 +1902,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
if (!files) {
ret = -ENOMEM;
goto exit;
}
for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
struct kfd_process_device *pdd = p->pdds[pdd_index];
struct amdgpu_bo *dumper_bo;
@ -1923,7 +1950,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
&bo_bucket->dmabuf_fd);
&bo_bucket->dmabuf_fd, &files[bo_index]);
if (ret)
goto exit;
} else {
@ -1974,12 +2001,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
*priv_offset += num_bos * sizeof(*bo_privs);
exit:
while (ret && bo_index--) {
if (bo_buckets[bo_index].alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[bo_index].dmabuf_fd);
}
commit_files(files, bo_buckets, bo_index, ret);
kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
@ -2331,7 +2354,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
static int criu_restore_bo(struct kfd_process *p,
struct kfd_criu_bo_bucket *bo_bucket,
struct kfd_criu_bo_priv_data *bo_priv)
struct kfd_criu_bo_priv_data *bo_priv,
struct file **file)
{
struct kfd_process_device *pdd;
struct kgd_mem *kgd_mem;
@ -2383,7 +2407,7 @@ static int criu_restore_bo(struct kfd_process *p,
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
&bo_bucket->dmabuf_fd);
&bo_bucket->dmabuf_fd, file);
if (ret)
return ret;
} else {
@ -2400,6 +2424,7 @@ static int criu_restore_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets = NULL;
struct kfd_criu_bo_priv_data *bo_privs = NULL;
struct file **files = NULL;
int ret = 0;
uint32_t i = 0;
@ -2413,6 +2438,12 @@ static int criu_restore_bos(struct kfd_process *p,
if (!bo_buckets)
return -ENOMEM;
files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
if (!files) {
ret = -ENOMEM;
goto exit;
}
ret = copy_from_user(bo_buckets, (void __user *)args->bos,
args->num_bos * sizeof(*bo_buckets));
if (ret) {
@ -2438,7 +2469,7 @@ static int criu_restore_bos(struct kfd_process *p,
/* Create and map new BOs */
for (; i < args->num_bos; i++) {
ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
if (ret) {
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
goto exit;
@ -2453,11 +2484,8 @@ static int criu_restore_bos(struct kfd_process *p,
ret = -EFAULT;
exit:
while (ret && i--) {
if (bo_buckets[i].alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[i].dmabuf_fd);
}
commit_files(files, bo_buckets, i, ret);
kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;

View File

@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
i++;
}
/* Scalar L1 Data Cache per SQC */
@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
i++;
}
/* GL1 Data Cache per SA */
@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
i++;
}
/* L3 Data Cache per GPU */
@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0;
i++;
}
return i;

View File

@ -2407,10 +2407,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val;
}
list_del(&q->list);
qpd->queue_count--;
if (q->properties.is_active) {
decrement_queue_count(dqm, qpd, q);
q->properties.is_active = false;
if (!dqm->dev->kfd->shared_resources.enable_mes) {
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
@ -2421,6 +2420,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = remove_queue_mes(dqm, q, qpd);
}
}
list_del(&q->list);
qpd->queue_count--;
/*
* Unconditionally decrement this counter, regardless of the queue's

View File

@ -167,11 +167,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SE3SH:
case SOC15_IH_CLIENTID_UTCL2:
block = AMDGPU_RAS_BLOCK__GFX;
if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else
if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
/* driver mode-2 for gfx poison is only supported by
* pmfw 0x00557300 and onwards */
if (dev->adev->pm.fw_version < 0x00557300)
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
} else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
/* driver mode-2 for gfx poison is only supported by
* pmfw 0x05550C00 and onwards */
if (dev->adev->pm.fw_version < 0x05550C00)
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
} else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}
break;
case SOC15_IH_CLIENTID_VMC:
case SOC15_IH_CLIENTID_VMC1:
@ -184,11 +196,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SDMA3:
case SOC15_IH_CLIENTID_SDMA4:
block = AMDGPU_RAS_BLOCK__SDMA;
if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else
if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) {
/* driver mode-2 for gfx poison is only supported by
* pmfw 0x00557300 and onwards */
if (dev->adev->pm.fw_version < 0x00557300)
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
} else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
/* driver mode-2 for gfx poison is only supported by
* pmfw 0x05550C00 and onwards */
if (dev->adev->pm.fw_version < 0x05550C00)
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
} else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}
break;
default:
dev_warn(dev->adev->dev,

View File

@ -868,6 +868,12 @@ struct svm_range_list {
struct task_struct *faulting_task;
/* check point ts decides if page fault recovery need be dropped */
uint64_t checkpoint_ts[MAX_GPU_INSTANCE];
/* Default granularity to use in buffer migration
* and restoration of backing memory while handling
* recoverable page faults
*/
uint8_t default_granularity;
};
/* Process data */

View File

@ -1046,6 +1046,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
pr_debug("Queue id %d was restored successfully\n", queue_id);
kfree(q_data);
kfree(q_extra_data);
return ret;
}

View File

@ -235,17 +235,16 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
amdgpu_reset_get_desc(reset_context, reset_cause,
sizeof(reset_cause));
kfd_smi_event_add(0, dev, event, "%x %s\n",
dev->reset_seq_num,
reset_cause);
kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
dev->reset_seq_num, reset_cause));
}
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask)
{
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
throttle_bitmask,
amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
}
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
@ -256,8 +255,8 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
if (task_info) {
/* Report VM faults from user applications, not retry from kernel */
if (task_info->pid)
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
task_info->pid, task_info->task_name);
kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
task_info->pid, task_info->task_name));
amdgpu_vm_put_task_info(task_info);
}
}
@ -267,16 +266,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
ktime_t ts)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
"%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
address, node->id, write_fault ? 'W' : 'R');
KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
address, node->id, write_fault ? 'W' : 'R'));
}
void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
unsigned long address, bool migration)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
"%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
pid, address, node->id, migration ? 'M' : 'U');
KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
pid, address, node->id, migration ? 'M' : 'U'));
}
void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
@ -286,9 +285,9 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
KFD_EVENT_FMT_MIGRATE_START(
ktime_get_boottime_ns(), pid, start, end - start,
from, to, prefetch_loc, preferred_loc, trigger);
from, to, prefetch_loc, preferred_loc, trigger));
}
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
@ -296,24 +295,24 @@ void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
uint32_t from, uint32_t to, uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
"%lld -%d @%lx(%lx) %x->%x %d\n",
KFD_EVENT_FMT_MIGRATE_END(
ktime_get_boottime_ns(), pid, start, end - start,
from, to, trigger);
from, to, trigger));
}
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
"%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
node->id, trigger);
KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
node->id, trigger));
}
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
"%lld -%d %x\n", ktime_get_boottime_ns(), pid,
node->id);
KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
node->id, 0));
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
@ -330,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
KFD_SMI_EVENT_QUEUE_RESTORE,
"%lld -%d %x %c\n", ktime_get_boottime_ns(),
p->lead_thread->pid, pdd->dev->id, 'R');
KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
p->lead_thread->pid, pdd->dev->id, 'R'));
}
kfd_unref_process(p);
}
@ -341,8 +340,8 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
"%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
pid, address, last - address + 1, node->id, trigger);
KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
pid, address, last - address + 1, node->id, trigger));
}
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)

View File

@ -309,12 +309,13 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
}
static void
svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
uint8_t *granularity, uint32_t *flags)
svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location,
int32_t *prefetch_loc, uint8_t *granularity,
uint32_t *flags)
{
*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
*granularity = 9;
*granularity = svms->default_granularity;
*flags =
KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
}
@ -358,7 +359,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
svm_range_set_default_attributes(&prange->preferred_loc,
svm_range_set_default_attributes(svms, &prange->preferred_loc,
&prange->prefetch_loc,
&prange->granularity, &prange->flags);
@ -2703,9 +2704,10 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
*is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
(unsigned long)ALIGN_DOWN(addr, 2UL << 8));
(unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity));
end_limit = min(vma->vm_end >> PAGE_SHIFT,
(unsigned long)ALIGN(addr + 1, 2UL << 8));
(unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity));
/* First range that starts after the fault address */
node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
if (node) {
@ -3249,6 +3251,12 @@ int svm_range_list_init(struct kfd_process *p)
if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
/* Value of default granularity cannot exceed 0x1B, the
* number of pages supported by a 4-level paging table
*/
svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B);
pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity);
return 0;
}
@ -3776,7 +3784,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
node = interval_tree_iter_first(&svms->objects, start, last);
if (!node) {
pr_debug("range attrs not found return default values\n");
svm_range_set_default_attributes(&location, &prefetch_loc,
svm_range_set_default_attributes(svms, &location, &prefetch_loc,
&granularity, &flags_and);
flags_or = flags_and;
if (p->xnack_enabled)

View File

@ -10610,7 +10610,7 @@ static bool should_reset_plane(struct drm_atomic_state *state,
* TODO: We can likely skip bandwidth validation if the only thing that
* changed about the plane was it'z z-ordering.
*/
if (new_crtc_state->zpos_changed)
if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos)
return true;
if (drm_atomic_crtc_needs_modeset(new_crtc_state))
@ -11458,6 +11458,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
drm_dbg(dev, "Failed to determine cursor mode\n");
goto fail;
}
/*
* If overlay cursor is needed, DC cannot go through the
* native cursor update path. All enabled planes on the CRTC
* need to be added for DC to not disable a plane by mistake
*/
if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) {
ret = drm_atomic_add_affected_planes(state, crtc);
if (ret)
goto fail;
}
}
/* Remove exiting planes if they are modified */

View File

@ -1325,7 +1325,7 @@ static bool is_dsc_need_re_compute(
if (new_crtc_state->enable && new_crtc_state->active) {
if (new_crtc_state->mode_changed || new_crtc_state->active_changed ||
new_crtc_state->connectors_changed) {
DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompte required."
DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required."
"stream 0x%p in new dc_state\n",
__func__, __LINE__, stream);
is_dsc_need_re_compute = true;

View File

@ -1748,10 +1748,6 @@ void dccg35_init(struct dccg *dccg)
dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false);
}
if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
for (otg_inst = 0; otg_inst < 4; otg_inst++)
dccg35_set_dppclk_root_clock_gating(dccg, otg_inst, 0);
/*
dccg35_enable_global_fgcg_rep(
dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits
@ -1932,47 +1928,32 @@ static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst,
}
/*get other front end connected to this backend*/
static uint8_t dccg35_get_other_enabled_symclk_fe(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
static uint8_t dccg35_get_number_enabled_symclk_fe_connected_to_be(struct dccg *dccg, uint32_t link_enc_inst)
{
uint8_t num_enabled_symclk_fe = 0;
uint32_t be_clk_en = 0, fe_clk_en[5] = {0}, be_clk_sel[5] = {0};
uint32_t fe_clk_en[5] = {0}, be_clk_sel[5] = {0};
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
switch (link_enc_inst) {
case 0:
REG_GET_3(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, &be_clk_en,
SYMCLKA_FE_EN, &fe_clk_en[0],
SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]);
break;
case 1:
REG_GET_3(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, &be_clk_en,
SYMCLKB_FE_EN, &fe_clk_en[1],
SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]);
break;
case 2:
REG_GET_3(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, &be_clk_en,
SYMCLKC_FE_EN, &fe_clk_en[2],
SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]);
break;
case 3:
REG_GET_3(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, &be_clk_en,
SYMCLKD_FE_EN, &fe_clk_en[3],
SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]);
break;
case 4:
REG_GET_3(SYMCLKE_CLOCK_ENABLE, SYMCLKE_CLOCK_ENABLE, &be_clk_en,
SYMCLKE_FE_EN, &fe_clk_en[4],
SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]);
break;
}
if (be_clk_en) {
/* for DPMST, this backend could be used by multiple front end.
only disable the backend if this stream_enc_ins is the last active stream enc connected to this back_end*/
uint8_t i;
for (i = 0; i != link_enc_inst && i < ARRAY_SIZE(fe_clk_en); i++) {
if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst)
num_enabled_symclk_fe++;
}
REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, &fe_clk_en[0],
SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]);
REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, &fe_clk_en[1],
SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]);
REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, &fe_clk_en[2],
SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]);
REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, &fe_clk_en[3],
SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]);
REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, &fe_clk_en[4],
SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]);
uint8_t i;
for (i = 0; i < ARRAY_SIZE(fe_clk_en); i++) {
if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst)
num_enabled_symclk_fe++;
}
return num_enabled_symclk_fe;
}
@ -2020,9 +2001,9 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst
break;
}
/*check other enabled symclk fe */
num_enabled_symclk_fe = dccg35_get_other_enabled_symclk_fe(dccg, stream_enc_inst, link_enc_inst);
/*only turn off backend clk if other front end attachecd to this backend are all off,
/*check other enabled symclk fe connected to this be */
num_enabled_symclk_fe = dccg35_get_number_enabled_symclk_fe_connected_to_be(dccg, link_enc_inst);
/*only turn off backend clk if other front end attached to this backend are all off,
for mst, only turn off the backend if this is the last front end*/
if (num_enabled_symclk_fe == 0) {
switch (link_enc_inst) {
@ -2351,6 +2332,14 @@ static void dccg35_disable_symclk_se_cb(
/* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */
}
void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating)
{
if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) {
dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating);
}
}
static const struct dccg_funcs dccg35_funcs_new = {
.update_dpp_dto = dccg35_update_dpp_dto_cb,
.dpp_root_clock_control = dccg35_dpp_root_clock_control_cb,
@ -2411,7 +2400,7 @@ static const struct dccg_funcs dccg35_funcs = {
.enable_symclk_se = dccg35_enable_symclk_se,
.disable_symclk_se = dccg35_disable_symclk_se,
.set_dtbclk_p_src = dccg35_set_dtbclk_p_src,
.dccg_root_gate_disable_control = dccg35_root_gate_disable_control,
};
struct dccg *dccg35_create(

View File

@ -241,6 +241,7 @@ struct dccg *dccg35_create(
void dccg35_init(struct dccg *dccg);
void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value);
void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating);
#endif //__DCN35_DCCG_H__

View File

@ -951,6 +951,8 @@ static void dpp401_dscl_set_isharp_filter(
*
* @dpp_base: High level DPP struct
* @scl_data: scalaer_data info
* @program_isharp_1dlut: flag to program isharp 1D LUT
* @bs_coeffs_updated: Blur and Scale Coefficients update flag
*
* This is the primary function to program isharp
*

View File

@ -3212,15 +3212,19 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
* as well.
*/
for (i = 0; i < num_pipes; i++) {
if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
pipe_ctx[i]->stream_res.tg->funcs->set_drr(
pipe_ctx[i]->stream_res.tg, &params);
/* dc_state_destruct() might null the stream resources, so fetch tg
* here first to avoid a race condition. The lifetime of the pointee
* itself (the timing_generator object) is not a problem here.
*/
struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
if ((tg != NULL) && tg->funcs) {
if (tg->funcs->set_drr)
tg->funcs->set_drr(tg, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
pipe_ctx[i]->stream_res.tg,
event_triggers, num_frames);
if (tg->funcs->set_static_screen_control)
tg->funcs->set_static_screen_control(
tg, event_triggers, num_frames);
}
}
}

View File

@ -240,6 +240,10 @@ void dcn35_init_hw(struct dc *dc)
dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
}
if (res_pool->dccg->funcs->dccg_root_gate_disable_control) {
for (i = 0; i < res_pool->pipe_count; i++)
res_pool->dccg->funcs->dccg_root_gate_disable_control(res_pool->dccg, i, 0);
}
for (i = 0; i < res_pool->audio_count; i++) {
struct audio *audio = res_pool->audios[i];
@ -1414,7 +1418,13 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
for (i = 0; i < num_pipes; i++) {
if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
/* dc_state_destruct() might null the stream resources, so fetch tg
* here first to avoid a race condition. The lifetime of the pointee
* itself (the timing_generator object) is not a problem here.
*/
struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
if ((tg != NULL) && tg->funcs) {
if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) {
struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
struct dc *dc = pipe_ctx[i]->stream->ctx->dc;
@ -1426,14 +1436,12 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
num_frames = 2 * (frame_rate % 60);
}
}
if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
pipe_ctx[i]->stream_res.tg->funcs->set_drr(
pipe_ctx[i]->stream_res.tg, &params);
if (tg->funcs->set_drr)
tg->funcs->set_drr(tg, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
pipe_ctx[i]->stream_res.tg,
event_triggers, num_frames);
if (tg->funcs->set_static_screen_control)
tg->funcs->set_static_screen_control(
tg, event_triggers, num_frames);
}
}
}

View File

@ -213,6 +213,7 @@ struct dccg_funcs {
uint32_t otg_inst);
void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
void (*dccg_root_gate_disable_control)(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating);
};
#endif //__DAL_DCCG_H__

View File

@ -1038,7 +1038,7 @@ struct display_object_info_table_v1_4
uint16_t supporteddevices;
uint8_t number_of_path;
uint8_t reserved;
struct atom_display_object_path_v2 display_path[8]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path
struct atom_display_object_path_v2 display_path[]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path
};
struct display_object_info_table_v1_5 {
@ -1048,7 +1048,7 @@ struct display_object_info_table_v1_5 {
uint8_t reserved;
// the real number of this included in the structure is calculated by using the
// (whole structure size - the header size- number_of_path)/size of atom_display_object_path
struct atom_display_object_path_v3 display_path[8];
struct atom_display_object_path_v3 display_path[];
};
/*

View File

@ -439,7 +439,16 @@ enum smu_clk_type {
__SMU_DUMMY_MAP(BACO_CG), \
__SMU_DUMMY_MAP(SOC_CG), \
__SMU_DUMMY_MAP(LOW_POWER_DCNCLKS), \
__SMU_DUMMY_MAP(WHISPER_MODE),
__SMU_DUMMY_MAP(WHISPER_MODE), \
__SMU_DUMMY_MAP(EDC_PWRBRK), \
__SMU_DUMMY_MAP(SOC_EDC_XVMIN), \
__SMU_DUMMY_MAP(GFX_PSM_DIDT), \
__SMU_DUMMY_MAP(APT_ALL_ENABLE), \
__SMU_DUMMY_MAP(APT_SQ_THROTTLE), \
__SMU_DUMMY_MAP(APT_PF_DCS), \
__SMU_DUMMY_MAP(GFX_EDC_XVMIN), \
__SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \
__SMU_DUMMY_MAP(FAN_ABNORMAL),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT

View File

@ -199,6 +199,15 @@ static struct cmn2asic_mapping smu_v14_0_2_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(MEM_TEMP_READ),
FEA_MAP(ATHUB_MMHUB_PG),
FEA_MAP(SOC_PCC),
FEA_MAP(EDC_PWRBRK),
FEA_MAP(SOC_EDC_XVMIN),
FEA_MAP(GFX_PSM_DIDT),
FEA_MAP(APT_ALL_ENABLE),
FEA_MAP(APT_SQ_THROTTLE),
FEA_MAP(APT_PF_DCS),
FEA_MAP(GFX_EDC_XVMIN),
FEA_MAP(GFX_DIDT_XVMIN),
FEA_MAP(FAN_ABNORMAL),
[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT},
@ -687,6 +696,9 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu)
pcie_table->clk_freq[pcie_table->num_of_link_levels] =
skutable->LclkFreq[link_level];
pcie_table->num_of_link_levels++;
if (link_level == 0)
link_level++;
}
/* dcefclk dpm table setup */

View File

@ -410,22 +410,30 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
}
/**
* drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers
* drm_gem_prime_handle_to_dmabuf - PRIME export function for GEM drivers
* @dev: dev to export the buffer from
* @file_priv: drm file-private structure
* @handle: buffer handle to export
* @flags: flags like DRM_CLOEXEC
* @prime_fd: pointer to storage for the fd id of the create dma-buf
*
* This is the PRIME export function which must be used mandatorily by GEM
* drivers to ensure correct lifetime management of the underlying GEM object.
* The actual exporting from GEM object to a dma-buf is done through the
* &drm_gem_object_funcs.export callback.
*
* Unlike drm_gem_prime_handle_to_fd(), it returns the struct dma_buf it
* has created, without attaching it to any file descriptors. The difference
* between those two is similar to that between anon_inode_getfile() and
* anon_inode_getfd(); insertion into descriptor table is something you
* can not revert if any cleanup is needed, so the descriptor-returning
* variants should only be used when you are past the last failure exit
* and the only thing left is passing the new file descriptor to userland.
* When all you need is the object itself or when you need to do something
* else that might fail, use that one instead.
*/
int drm_gem_prime_handle_to_fd(struct drm_device *dev,
struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev,
struct drm_file *file_priv, uint32_t handle,
uint32_t flags,
int *prime_fd)
uint32_t flags)
{
struct drm_gem_object *obj;
int ret = 0;
@ -434,14 +442,14 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev,
mutex_lock(&file_priv->prime.lock);
obj = drm_gem_object_lookup(file_priv, handle);
if (!obj) {
ret = -ENOENT;
dmabuf = ERR_PTR(-ENOENT);
goto out_unlock;
}
dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle);
if (dmabuf) {
get_dma_buf(dmabuf);
goto out_have_handle;
goto out;
}
mutex_lock(&dev->object_name_lock);
@ -463,7 +471,6 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev,
/* normally the created dma-buf takes ownership of the ref,
* but if that fails then drop the ref
*/
ret = PTR_ERR(dmabuf);
mutex_unlock(&dev->object_name_lock);
goto out;
}
@ -478,34 +485,51 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev,
ret = drm_prime_add_buf_handle(&file_priv->prime,
dmabuf, handle);
mutex_unlock(&dev->object_name_lock);
if (ret)
goto fail_put_dmabuf;
out_have_handle:
ret = dma_buf_fd(dmabuf, flags);
/*
* We must _not_ remove the buffer from the handle cache since the newly
* created dma buf is already linked in the global obj->dma_buf pointer,
* and that is invariant as long as a userspace gem handle exists.
* Closing the handle will clean out the cache anyway, so we don't leak.
*/
if (ret < 0) {
goto fail_put_dmabuf;
} else {
*prime_fd = ret;
ret = 0;
if (ret) {
dma_buf_put(dmabuf);
dmabuf = ERR_PTR(ret);
}
goto out;
fail_put_dmabuf:
dma_buf_put(dmabuf);
out:
drm_gem_object_put(obj);
out_unlock:
mutex_unlock(&file_priv->prime.lock);
return dmabuf;
}
EXPORT_SYMBOL(drm_gem_prime_handle_to_dmabuf);
return ret;
/**
* drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers
* @dev: dev to export the buffer from
* @file_priv: drm file-private structure
* @handle: buffer handle to export
* @flags: flags like DRM_CLOEXEC
* @prime_fd: pointer to storage for the fd id of the create dma-buf
*
* This is the PRIME export function which must be used mandatorily by GEM
* drivers to ensure correct lifetime management of the underlying GEM object.
* The actual exporting from GEM object to a dma-buf is done through the
* &drm_gem_object_funcs.export callback.
*/
int drm_gem_prime_handle_to_fd(struct drm_device *dev,
struct drm_file *file_priv, uint32_t handle,
uint32_t flags,
int *prime_fd)
{
struct dma_buf *dmabuf;
int fd = get_unused_fd_flags(flags);
if (fd < 0)
return fd;
dmabuf = drm_gem_prime_handle_to_dmabuf(dev, file_priv, handle, flags);
if (IS_ERR(dmabuf)) {
put_unused_fd(fd);
return PTR_ERR(dmabuf);
}
fd_install(fd, dmabuf->file);
*prime_fd = fd;
return 0;
}
EXPORT_SYMBOL(drm_gem_prime_handle_to_fd);

View File

@ -69,6 +69,9 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf);
int drm_gem_prime_fd_to_handle(struct drm_device *dev,
struct drm_file *file_priv, int prime_fd, uint32_t *handle);
struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev,
struct drm_file *file_priv, uint32_t handle,
uint32_t flags);
int drm_gem_prime_handle_to_fd(struct drm_device *dev,
struct drm_file *file_priv, uint32_t handle, uint32_t flags,
int *prime_fd);

View File

@ -540,26 +540,29 @@ enum kfd_smi_event {
KFD_SMI_EVENT_ALL_PROCESS = 64
};
/* The reason of the page migration event */
enum KFD_MIGRATE_TRIGGERS {
KFD_MIGRATE_TRIGGER_PREFETCH,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
KFD_MIGRATE_TRIGGER_TTM_EVICTION
KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU VRAM or system memory */
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, /* GPU page fault recover */
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, /* CPU page fault recover */
KFD_MIGRATE_TRIGGER_TTM_EVICTION /* TTM eviction */
};
/* The reason of user queue evition event */
enum KFD_QUEUE_EVICTION_TRIGGERS {
KFD_QUEUE_EVICTION_TRIGGER_SVM,
KFD_QUEUE_EVICTION_TRIGGER_USERPTR,
KFD_QUEUE_EVICTION_TRIGGER_TTM,
KFD_QUEUE_EVICTION_TRIGGER_SUSPEND,
KFD_QUEUE_EVICTION_CRIU_CHECKPOINT,
KFD_QUEUE_EVICTION_CRIU_RESTORE
KFD_QUEUE_EVICTION_TRIGGER_SVM, /* SVM buffer migration */
KFD_QUEUE_EVICTION_TRIGGER_USERPTR, /* userptr movement */
KFD_QUEUE_EVICTION_TRIGGER_TTM, /* TTM move buffer */
KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, /* GPU suspend */
KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, /* CRIU checkpoint */
KFD_QUEUE_EVICTION_CRIU_RESTORE /* CRIU restore */
};
/* The reason of unmap buffer from GPU event */
enum KFD_SVM_UNMAP_TRIGGERS {
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY,
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,
KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, /* MMU notifier CPU buffer movement */
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */
KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU /* Unmap to free the buffer */
};
#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
@ -570,6 +573,77 @@ struct kfd_ioctl_smi_events_args {
__u32 anon_fd; /* from KFD */
};
/*
* SVM event tracing via SMI system management interface
*
* Open event file descriptor
* use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file
* descriptor to receive SMI events.
* If calling with sudo permission, then file descriptor can be used to receive
* SVM events from all processes, otherwise, to only receive SVM events of same
* process.
*
* To enable the SVM event
* Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap
* mask to start record the event to the kfifo, use bitmap mask combination
* for multiple events. New event mask will overwrite the previous event mask.
* KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo
* permisson to receive SVM events from all process.
*
* To receive the event
* Application can poll file descriptor to wait for the events, then read event
* from the file into a buffer. Each event is one line string message, starting
* with the event id, then the event specific information.
*
* To decode event information
* The following event format string macro can be used with sscanf to decode
* the specific event information.
* event triggers: the reason to generate the event, defined as enum for unmap,
* eviction and migrate events.
* node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory.
* addr: user mode address, in pages
* size: in pages
* pid: the process ID to generate the event
* ns: timestamp in nanosecond-resolution, starts at system boot time but
* stops during suspend
* migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update
* rw: 'W' for write page fault, 'R' for read page fault
* rescheduled: 'R' if the queue restore failed and rescheduled to try again
*/
#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
"%x %s\n", (reset_seq_num), (reset_cause)
#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\
"%llx:%llx\n", (bitmask), (counter)
#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\
"%x:%s\n", (pid), (task_name)
#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\
"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw)
#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\
"%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update)
#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\
preferred_loc, migrate_trigger)\
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
(from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\
"%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\
(from), (to), (migrate_trigger)
#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
"%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\
"%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled)
#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\
"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
(node), (unmap_trigger)
/**************************************************************************************************
* CRIU IOCTLs (Checkpoint Restore In Userspace)
*