drm fixes for 6.13-rc3

i915:
 - Don't use indexed register writes needlessly [dsb]
 - Stop using non-posted DSB writes for legacy LUT [color]
 - Fix NULL pointer dereference in capture_engine
 - Fix memory leak by correcting cache object name in error handler
 
 xe:
 - Fix a KUNIT test error message (Mirsad Todorovac)
 - Fix an invalidation fence PM ref leak (Daniele)
 - Fix a register pool UAF (Lucas)
 
 amdgpu:
 - ISP hw init fix
 - SR-IOV fixes
 - Fix contiguous VRAM mapping for UVD on older GPUs
 - Fix some regressions due to drm scheduler changes
 - Workload profile fixes
 - Cleaner shader fix
 
 amdkfd:
 - Fix DMA map direction for migration
 - Fix a potential null pointer dereference
 - Cacheline size fixes
 - Runtime PM fix
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmdcpCsACgkQDHTzWXnE
 hr6RXg//Z3nd4gM4OU5AtSrfQitBa1WkLKzFzajkn3nCU2XOIWtikDaWXrUVcXIj
 s1FOaALnmyVW+BypxBQSGEzBxw1kzo5P4Xlvx9cmhNyDOe0PqupYVcDytdNyOrJF
 w6eh36T7KMT9fhFpp64mwPURtLWkb2BmsQ5ZRHeGJ1bbcV+NyPRu6UPT0CpRhR0L
 1h+maVKMpanpPzjzoxDmt6cgKLjIYq8e6JHNyOY+sn+hf0UP7eYp2D3qreasV4yK
 XnvizDvxoy8jmlPLHfXd5meEluDrg3zMNS0TlNfHdIOesida3qXhHK9EubxqlCeP
 cgbrqvw7BR2fFYozCpbkQaZtFu4dcZIoo/QmXGzMVXR/HCox20ZsmNF1G0Pigjlr
 21IGYv7QDxXp5+S6OUcZOvN8Z3U7OFMZzv2+kzNbm+Y7htdQG+uZ216o0X9HVJGe
 Fehs31QYVpmoYu38LbXUa/LmkgSVslGwwaxqNigRykE5Eia4WzdFAPED50m53wIE
 6izNamXJI+xbbwea3xSI1pLbEf/9LKADY5OaXlMxzm5jWkLQKZWeasGyJPtBDAe3
 4iEfcARRCrREI+WqWku9rxh8BupJmmiJrT4mFXpW6pFdDrtjZu5rkvmX4Hh4UTZf
 VrlnR4R+MfEn+IoKp3M6R/830HQcFuRkJnCn7ZmOTDrl07t7Fd4=
 =P9h4
 -----END PGP SIGNATURE-----

Merge tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "This is the weekly fixes pull for drm. Just has i915, xe and amdgpu
  changes in it. Nothing too major in here:

  i915:
   - Don't use indexed register writes needlessly [dsb]
   - Stop using non-posted DSB writes for legacy LUT [color]
   - Fix NULL pointer dereference in capture_engine
   - Fix memory leak by correcting cache object name in error handler

  xe:
   - Fix a KUNIT test error message (Mirsad Todorovac)
   - Fix an invalidation fence PM ref leak (Daniele)
   - Fix a register pool UAF (Lucas)

  amdgpu:
   - ISP hw init fix
   - SR-IOV fixes
   - Fix contiguous VRAM mapping for UVD on older GPUs
   - Fix some regressions due to drm scheduler changes
   - Workload profile fixes
   - Cleaner shader fix

  amdkfd:
   - Fix DMA map direction for migration
   - Fix a potential null pointer dereference
   - Cacheline size fixes
   - Runtime PM fix"

* tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel:
  drm/xe/reg_sr: Remove register pool
  drm/xe: Call invalidation_fence_fini for PT inval fences in error state
  drm/xe: fix the ERR_PTR() returned on failure to allocate tiny pt
  drm/amdkfd: pause autosuspend when creating pdd
  drm/amdgpu: fix when the cleaner shader is emitted
  drm/amdgpu: Fix ISP HW init issue
  drm/amdkfd: hard-code MALL cacheline size for gfx11, gfx12
  drm/amdkfd: hard-code cacheline size for gfx11
  drm/amdkfd: Dereference null return value
  drm/i915: Fix memory leak by correcting cache object name in error handler
  drm/i915: Fix NULL pointer dereference in capture_engine
  drm/i915/color: Stop using non-posted DSB writes for legacy LUT
  drm/i915/dsb: Don't use indexed register writes needlessly
  drm/amdkfd: Correct the migration DMA map direction
  drm/amd/pm: Set SMU v13.0.7 default workload type
  drm/amd/pm: Initialize power profile mode
  amdgpu/uvd: get ring reference from rq scheduler
  drm/amdgpu: fix UVD contiguous CS mapping problem
  drm/amdgpu: use sjt mec fw on gfx943 for sriov
  Revert "drm/amdgpu: Fix ISP hw init issue"
This commit is contained in:
Linus Torvalds 2024-12-13 16:58:39 -08:00
commit e72da82d5a
25 changed files with 200 additions and 125 deletions

View File

@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL; return -EINVAL;
/* Make sure VRAM is allocated contigiously */
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
!((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
for (i = 0; i < (*bo)->placement.num_placement; i++) for (i = 0; i < (*bo)->placement.num_placement; i++)
(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r) if (r)
return r; return r;
}
return amdgpu_ttm_alloc_gart(&(*bo)->tbo); return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
} }

View File

@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = {
"LAST", "LAST",
}; };
#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0) #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
/* /*
* Default init level where all blocks are expected to be initialized. This is * Default init level where all blocks are expected to be initialized. This is
* the level of initialization expected by default and also after a full reset * the level of initialization expected by default and also after a full reset

View File

@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
for (i = 0; i < abo->placement.num_placement; ++i) { for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT; abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
if (abo->placements[i].mem_type == TTM_PL_VRAM)
abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
} }
} }

View File

@ -674,12 +674,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg; ring->funcs->emit_wreg;
if (adev->gfx.enable_cleaner_shader && if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
ring->funcs->emit_cleaner_shader && !(job->enforce_isolation && !job->vmid))
job->enforce_isolation)
ring->funcs->emit_cleaner_shader(ring);
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
return 0; return 0;
amdgpu_ring_ib_begin(ring); amdgpu_ring_ib_begin(ring);
@ -690,6 +686,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (need_pipe_sync) if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring); amdgpu_ring_emit_pipeline_sync(ring);
if (adev->gfx.enable_cleaner_shader &&
ring->funcs->emit_cleaner_shader &&
job->enforce_isolation)
ring->funcs->emit_cleaner_shader(ring);
if (vm_flush_needed) { if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);

View File

@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
#define GFX9_MEC_HPD_SIZE 4096 #define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@ -574,6 +576,10 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
{ {
int err; int err;
if (amdgpu_sriov_vf(adev))
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
"amdgpu/%s_sjt_mec.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
"amdgpu/%s_mec.bin", chip_name); "amdgpu/%s_mec.bin", chip_name);
if (err) if (err)

View File

@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job, struct amdgpu_job *job,
struct amdgpu_ib *ib) struct amdgpu_ib *ib)
{ {
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct amdgpu_ring *ring = amdgpu_job_ring(job);
unsigned i; unsigned i;
/* No patching necessary for the first instance */ /* No patching necessary for the first instance */

View File

@ -1423,6 +1423,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
bool cache_line_size_missing,
struct kfd_gpu_cache_info *pcache_info) struct kfd_gpu_cache_info *pcache_info)
{ {
struct amdgpu_device *adev = kdev->adev; struct amdgpu_device *adev = kdev->adev;
@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE); CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 128;
i++; i++;
} }
/* Scalar L1 Instruction Cache per SQC */ /* Scalar L1 Instruction Cache per SQC */
@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE); CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 128;
i++; i++;
} }
/* Scalar L1 Data Cache per SQC */ /* Scalar L1 Data Cache per SQC */
@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE); CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 64;
i++; i++;
} }
/* GL1 Data Cache per SA */ /* GL1 Data Cache per SA */
@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE); CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0; if (cache_line_size_missing)
pcache_info[i].cache_line_size = 128;
i++; i++;
} }
/* L2 Data Cache per GPU (Total Tex Cache) */ /* L2 Data Cache per GPU (Total Tex Cache) */
@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE); CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 128;
i++; i++;
} }
/* L3 Data Cache per GPU */ /* L3 Data Cache per GPU */
@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE); CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0; pcache_info[i].cache_line_size = 64;
i++; i++;
} }
return i; return i;
@ -1569,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{ {
int num_of_cache_types = 0; int num_of_cache_types = 0;
bool cache_line_size_missing = false;
switch (kdev->adev->asic_type) { switch (kdev->adev->asic_type) {
case CHIP_KAVERI: case CHIP_KAVERI:
@ -1692,10 +1703,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2): case IP_VERSION(11, 5, 2):
/* Cacheline size not available in IP discovery for gc11.
* kfd_fill_gpu_cache_info_from_gfx_config to hard code it
*/
cache_line_size_missing = true;
fallthrough;
case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1): case IP_VERSION(12, 0, 1):
num_of_cache_types = num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
cache_line_size_missing,
*pcache_info);
break; break;
default: default:
*pcache_info = dummy_cache_info; *pcache_info = dummy_cache_info;

View File

@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
if (!down_read_trylock(&adev->reset_domain->sem)) if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO; return -EIO;
if (!pdd->proc_ctx_cpu_ptr) {
r = amdgpu_amdkfd_alloc_gtt_mem(adev,
AMDGPU_MES_PROC_CTX_SIZE,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr,
false);
if (r) {
dev_err(adev->dev,
"failed to allocate process context bo\n");
return r;
}
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
queue_input.process_id = qpd->pqm->process->pasid; queue_input.process_id = qpd->pqm->process->pasid;
queue_input.page_table_base_addr = qpd->page_table_base; queue_input.page_table_base_addr = qpd->page_table_base;

View File

@ -306,7 +306,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
spage = migrate_pfn_to_page(migrate->src[i]); spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) { if (spage && !is_zone_device_page(spage)) {
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE); DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, src[i]); r = dma_mapping_error(dev, src[i]);
if (r) { if (r) {
dev_err(dev, "%s: fail %d dma_map_page\n", dev_err(dev, "%s: fail %d dma_map_page\n",
@ -629,7 +629,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_oom; goto out_oom;
} }
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, dst[i]); r = dma_mapping_error(dev, dst[i]);
if (r) { if (r) {
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r); dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);

View File

@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfd_free_process_doorbells(pdd->dev->kfd, pdd); kfd_free_process_doorbells(pdd->dev->kfd, pdd);
if (pdd->dev->kfd->shared_resources.enable_mes) if (pdd->dev->kfd->shared_resources.enable_mes &&
pdd->proc_ctx_cpu_ptr)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
&pdd->proc_ctx_bo); &pdd->proc_ctx_bo);
/* /*
@ -1608,7 +1609,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p) struct kfd_process *p)
{ {
struct kfd_process_device *pdd = NULL; struct kfd_process_device *pdd = NULL;
int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL; return NULL;
@ -1632,21 +1632,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->user_gpu_id = dev->id; pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0); atomic64_set(&pdd->evict_duration_counter, 0);
if (dev->kfd->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_PROC_CTX_SIZE,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr,
false);
if (retval) {
dev_err(dev->adev->dev,
"failed to allocate process context bo\n");
goto err_free_pdd;
}
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
p->pdds[p->n_pdds++] = pdd; p->pdds[p->n_pdds++] = pdd;
if (kfd_dbg_is_per_vmid_supported(pdd->dev)) if (kfd_dbg_is_per_vmid_supported(pdd->dev))
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@ -1658,10 +1643,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
idr_init(&pdd->alloc_idr); idr_init(&pdd->alloc_idr);
return pdd; return pdd;
err_free_pdd:
kfree(pdd);
return NULL;
} }
/** /**

View File

@ -212,13 +212,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
void pqm_uninit(struct process_queue_manager *pqm) void pqm_uninit(struct process_queue_manager *pqm)
{ {
struct process_queue_node *pqn, *next; struct process_queue_node *pqn, *next;
struct kfd_process_device *pdd;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
if (pqn->q) { if (pqn->q) {
pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
pqm->process);
if (pdd) {
kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
kfd_queue_release_buffers(pdd, &pqn->q->properties); kfd_queue_release_buffers(pdd, &pqn->q->properties);
} else {
WARN_ON(!pdd);
}
pqm_clean_queue_resource(pqm, pqn); pqm_clean_queue_resource(pqm, pqn);
} }

View File

@ -164,6 +164,7 @@ enum amd_pp_task {
}; };
enum PP_SMC_POWER_PROFILE { enum PP_SMC_POWER_PROFILE {
PP_SMC_POWER_PROFILE_UNKNOWN = -1,
PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0, PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0,
PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1, PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1,
PP_SMC_POWER_PROFILE_POWERSAVING = 0x2, PP_SMC_POWER_PROFILE_POWERSAVING = 0x2,

View File

@ -764,6 +764,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
smu->smu_baco.platform_support = false; smu->smu_baco.platform_support = false;
smu->smu_baco.maco_support = false; smu->smu_baco.maco_support = false;
smu->user_dpm_profile.fan_mode = -1; smu->user_dpm_profile.fan_mode = -1;
smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN;
mutex_init(&smu->message_lock); mutex_init(&smu->message_lock);
@ -1248,6 +1249,21 @@ static bool smu_is_workload_profile_available(struct smu_context *smu,
return smu->workload_map && smu->workload_map[profile].valid_mapping; return smu->workload_map && smu->workload_map[profile].valid_mapping;
} }
static void smu_init_power_profile(struct smu_context *smu)
{
if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) {
if (smu->is_apu ||
!smu_is_workload_profile_available(
smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
smu->power_profile_mode =
PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
else
smu->power_profile_mode =
PP_SMC_POWER_PROFILE_FULLSCREEN3D;
}
smu_power_profile_mode_get(smu, smu->power_profile_mode);
}
static int smu_sw_init(struct amdgpu_ip_block *ip_block) static int smu_sw_init(struct amdgpu_ip_block *ip_block)
{ {
struct amdgpu_device *adev = ip_block->adev; struct amdgpu_device *adev = ip_block->adev;
@ -1269,13 +1285,7 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
if (smu->is_apu || smu_init_power_profile(smu);
!smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
else
smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
smu_power_profile_mode_get(smu, smu->power_profile_mode);
smu->display_config = &adev->pm.pm_display_cfg; smu->display_config = &adev->pm.pm_display_cfg;
smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;

View File

@ -2810,4 +2810,5 @@ void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
smu->workload_map = smu_v13_0_7_workload_map; smu->workload_map = smu_v13_0_7_workload_map;
smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION; smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION;
smu_v13_0_set_smu_mailbox_registers(smu); smu_v13_0_set_smu_mailbox_registers(smu);
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
} }

View File

@ -1343,6 +1343,17 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state,
intel_de_write_fw(display, reg, val); intel_de_write_fw(display, reg, val);
} }
static void ilk_lut_write_indexed(const struct intel_crtc_state *crtc_state,
i915_reg_t reg, u32 val)
{
struct intel_display *display = to_intel_display(crtc_state);
if (crtc_state->dsb_color_vblank)
intel_dsb_reg_write_indexed(crtc_state->dsb_color_vblank, reg, val);
else
intel_de_write_fw(display, reg, val);
}
static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state, static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state,
const struct drm_property_blob *blob) const struct drm_property_blob *blob)
{ {
@ -1357,19 +1368,29 @@ static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state,
lut = blob->data; lut = blob->data;
/* /*
* DSB fails to correctly load the legacy LUT * DSB fails to correctly load the legacy LUT unless
* unless we either write each entry twice, * we either write each entry twice when using posted
* or use non-posted writes * writes, or we use non-posted writes.
*
* If palette anti-collision is active during LUT
* register writes:
* - posted writes simply get dropped and thus the LUT
* contents may not be correctly updated
* - non-posted writes are blocked and thus the LUT
* contents are always correct, but simultaneous CPU
* MMIO access will start to fail
*
* Choose the lesser of two evils and use posted writes.
* Using posted writes is also faster, even when having
* to write each register twice.
*/ */
if (crtc_state->dsb_color_vblank) for (i = 0; i < 256; i++) {
intel_dsb_nonpost_start(crtc_state->dsb_color_vblank);
for (i = 0; i < 256; i++)
ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i), ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i),
i9xx_lut_8(&lut[i])); i9xx_lut_8(&lut[i]));
if (crtc_state->dsb_color_vblank) if (crtc_state->dsb_color_vblank)
intel_dsb_nonpost_end(crtc_state->dsb_color_vblank); ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i),
i9xx_lut_8(&lut[i]));
}
} }
static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state, static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state,
@ -1458,7 +1479,7 @@ static void bdw_load_lut_10(const struct intel_crtc_state *crtc_state,
prec_index); prec_index);
for (i = 0; i < lut_size; i++) for (i = 0; i < lut_size; i++)
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
ilk_lut_10(&lut[i])); ilk_lut_10(&lut[i]));
/* /*
@ -1612,14 +1633,14 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
* ToDo: Extend to max 7.0. Enable 32 bit input value * ToDo: Extend to max 7.0. Enable 32 bit input value
* as compared to just 16 to achieve this. * as compared to just 16 to achieve this.
*/ */
ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
DISPLAY_VER(display) >= 14 ? DISPLAY_VER(display) >= 14 ?
mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i])); mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i]));
} }
/* Clamp values > 1.0. */ /* Clamp values > 1.0. */
while (i++ < glk_degamma_lut_size(display)) while (i++ < glk_degamma_lut_size(display))
ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe), ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
DISPLAY_VER(display) >= 14 ? DISPLAY_VER(display) >= 14 ?
1 << 24 : 1 << 16); 1 << 24 : 1 << 16);
@ -1687,9 +1708,9 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
for (i = 0; i < 9; i++) { for (i = 0; i < 9; i++) {
const struct drm_color_lut *entry = &lut[i]; const struct drm_color_lut *entry = &lut[i];
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
ilk_lut_12p4_ldw(entry)); ilk_lut_12p4_ldw(entry));
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe), ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
ilk_lut_12p4_udw(entry)); ilk_lut_12p4_udw(entry));
} }
@ -1726,9 +1747,9 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
for (i = 1; i < 257; i++) { for (i = 1; i < 257; i++) {
entry = &lut[i * 8]; entry = &lut[i * 8];
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
ilk_lut_12p4_ldw(entry)); ilk_lut_12p4_ldw(entry));
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
ilk_lut_12p4_udw(entry)); ilk_lut_12p4_udw(entry));
} }
@ -1747,9 +1768,9 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
entry = &lut[i * 8 * 128]; entry = &lut[i * 8 * 128];
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
ilk_lut_12p4_ldw(entry)); ilk_lut_12p4_ldw(entry));
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe), ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
ilk_lut_12p4_udw(entry)); ilk_lut_12p4_udw(entry));
} }

View File

@ -273,15 +273,19 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_
} }
/** /**
* intel_dsb_reg_write() - Emit register wriite to the DSB context * intel_dsb_reg_write_indexed() - Emit register wriite to the DSB context
* @dsb: DSB context * @dsb: DSB context
* @reg: register address. * @reg: register address.
* @val: value. * @val: value.
* *
* This function is used for writing register-value pair in command * This function is used for writing register-value pair in command
* buffer of DSB. * buffer of DSB.
*
* Note that indexed writes are slower than normal MMIO writes
* for a small number (less than 5 or so) of writes to the same
* register.
*/ */
void intel_dsb_reg_write(struct intel_dsb *dsb, void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
i915_reg_t reg, u32 val) i915_reg_t reg, u32 val)
{ {
/* /*
@ -340,6 +344,15 @@ void intel_dsb_reg_write(struct intel_dsb *dsb,
} }
} }
void intel_dsb_reg_write(struct intel_dsb *dsb,
i915_reg_t reg, u32 val)
{
intel_dsb_emit(dsb, val,
(DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
(DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
i915_mmio_reg_offset(reg));
}
static u32 intel_dsb_mask_to_byte_en(u32 mask) static u32 intel_dsb_mask_to_byte_en(u32 mask)
{ {
return (!!(mask & 0xff000000) << 3 | return (!!(mask & 0xff000000) << 3 |

View File

@ -34,6 +34,8 @@ void intel_dsb_finish(struct intel_dsb *dsb);
void intel_dsb_cleanup(struct intel_dsb *dsb); void intel_dsb_cleanup(struct intel_dsb *dsb);
void intel_dsb_reg_write(struct intel_dsb *dsb, void intel_dsb_reg_write(struct intel_dsb *dsb,
i915_reg_t reg, u32 val); i915_reg_t reg, u32 val);
void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
i915_reg_t reg, u32 val);
void intel_dsb_reg_write_masked(struct intel_dsb *dsb, void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
i915_reg_t reg, u32 mask, u32 val); i915_reg_t reg, u32 mask, u32 val);
void intel_dsb_noop(struct intel_dsb *dsb, int count); void intel_dsb_noop(struct intel_dsb *dsb, int count);

View File

@ -1643,9 +1643,21 @@ capture_engine(struct intel_engine_cs *engine,
return NULL; return NULL;
intel_engine_get_hung_entity(engine, &ce, &rq); intel_engine_get_hung_entity(engine, &ce, &rq);
if (rq && !i915_request_started(rq)) if (rq && !i915_request_started(rq)) {
drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n", /*
* We want to know also what is the guc_id of the context,
* but if we don't have the context reference, then skip
* printing it.
*/
if (ce)
drm_info(&engine->gt->i915->drm,
"Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n",
engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id); engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id);
else
drm_info(&engine->gt->i915->drm,
"Got hung context on %s with active request %lld:%lld not yet started\n",
engine->name, rq->fence.context, rq->fence.seqno);
}
if (rq) { if (rq) {
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);

View File

@ -506,6 +506,6 @@ int __init i915_scheduler_module_init(void)
return 0; return 0;
err_priorities: err_priorities:
kmem_cache_destroy(slab_priorities); kmem_cache_destroy(slab_dependencies);
return -ENOMEM; return -ENOMEM;
} }

View File

@ -224,8 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_PINNED); XE_BO_FLAG_PINNED);
if (IS_ERR(tiny)) { if (IS_ERR(tiny)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
PTR_ERR(pt)); PTR_ERR(tiny));
goto free_pt; goto free_pt;
} }

View File

@ -65,6 +65,14 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
__invalidation_fence_signal(xe, fence); __invalidation_fence_signal(xe, fence);
} }
void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
{
if (WARN_ON_ONCE(!fence->gt))
return;
__invalidation_fence_signal(gt_to_xe(fence->gt), fence);
}
static void xe_gt_tlb_fence_timeout(struct work_struct *work) static void xe_gt_tlb_fence_timeout(struct work_struct *work)
{ {
struct xe_gt *gt = container_of(work, struct xe_gt, struct xe_gt *gt = container_of(work, struct xe_gt,

View File

@ -28,6 +28,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence, struct xe_gt_tlb_invalidation_fence *fence,
bool stack); bool stack);
void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence);
static inline void static inline void
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)

View File

@ -1333,8 +1333,7 @@ static void invalidation_fence_cb(struct dma_fence *fence,
queue_work(system_wq, &ifence->work); queue_work(system_wq, &ifence->work);
} else { } else {
ifence->base.base.error = ifence->fence->error; ifence->base.base.error = ifence->fence->error;
dma_fence_signal(&ifence->base.base); xe_gt_tlb_invalidation_fence_signal(&ifence->base);
dma_fence_put(&ifence->base.base);
} }
dma_fence_put(ifence->fence); dma_fence_put(ifence->fence);
} }

View File

@ -27,46 +27,27 @@
#include "xe_reg_whitelist.h" #include "xe_reg_whitelist.h"
#include "xe_rtp_types.h" #include "xe_rtp_types.h"
#define XE_REG_SR_GROW_STEP_DEFAULT 16
static void reg_sr_fini(struct drm_device *drm, void *arg) static void reg_sr_fini(struct drm_device *drm, void *arg)
{ {
struct xe_reg_sr *sr = arg; struct xe_reg_sr *sr = arg;
struct xe_reg_sr_entry *entry;
unsigned long reg;
xa_for_each(&sr->xa, reg, entry)
kfree(entry);
xa_destroy(&sr->xa); xa_destroy(&sr->xa);
kfree(sr->pool.arr);
memset(&sr->pool, 0, sizeof(sr->pool));
} }
int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
{ {
xa_init(&sr->xa); xa_init(&sr->xa);
memset(&sr->pool, 0, sizeof(sr->pool));
sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
sr->name = name; sr->name = name;
return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
} }
EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init);
static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
{
if (sr->pool.used == sr->pool.allocated) {
struct xe_reg_sr_entry *arr;
arr = krealloc_array(sr->pool.arr,
ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
sizeof(*arr), GFP_KERNEL);
if (!arr)
return NULL;
sr->pool.arr = arr;
sr->pool.allocated += sr->pool.grow_step;
}
return &sr->pool.arr[sr->pool.used++];
}
static bool compatible_entries(const struct xe_reg_sr_entry *e1, static bool compatible_entries(const struct xe_reg_sr_entry *e1,
const struct xe_reg_sr_entry *e2) const struct xe_reg_sr_entry *e2)
{ {
@ -112,7 +93,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
return 0; return 0;
} }
pentry = alloc_entry(sr); pentry = kmalloc(sizeof(*pentry), GFP_KERNEL);
if (!pentry) { if (!pentry) {
ret = -ENOMEM; ret = -ENOMEM;
goto fail; goto fail;

View File

@ -20,12 +20,6 @@ struct xe_reg_sr_entry {
}; };
struct xe_reg_sr { struct xe_reg_sr {
struct {
struct xe_reg_sr_entry *arr;
unsigned int used;
unsigned int allocated;
unsigned int grow_step;
} pool;
struct xarray xa; struct xarray xa;
const char *name; const char *name;