amd-drm-fixes-6.13-2024-12-11:

amdgpu:
 - ISP hw init fix
 - SR-IOV fixes
 - Fix contiguous VRAM mapping for UVD on older GPUs
 - Fix some regressions due to drm scheduler changes
 - Workload profile fixes
 - Cleaner shader fix
 
 amdkfd:
 - Fix DMA map direction for migration
 - Fix a potential null pointer dereference
 - Cacheline size fixes
 - Runtime PM fix
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZ1oJLgAKCRC93/aFa7yZ
 2JoeAQCdBc9+GR9hLY7R6rjSNfwW0qQ7CovcKD/95BCvy9AtPAD9E/m7ULHPdQ6r
 cSwuvVzccsRM0Qnz74imXXARW+26rQ0=
 =Tb2v
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-fixes-6.13-2024-12-11' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.13-2024-12-11:

amdgpu:
- ISP hw init fix
- SR-IOV fixes
- Fix contiguous VRAM mapping for UVD on older GPUs
- Fix some regressions due to drm scheduler changes
- Workload profile fixes
- Cleaner shader fix

amdkfd:
- Fix DMA map direction for migration
- Fix a potential null pointer dereference
- Cacheline size fixes
- Runtime PM fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241211215449.741848-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2024-12-13 09:42:57 +10:00
commit d172ea67db
14 changed files with 97 additions and 53 deletions

View File

@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL;
/* Make sure VRAM is allocated contigiously */
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
for (i = 0; i < (*bo)->placement.num_placement; i++)
(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;
if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
!((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
for (i = 0; i < (*bo)->placement.num_placement; i++)
(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;
}
return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
}

View File

@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = {
"LAST",
};
#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0)
#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
/*
* Default init level where all blocks are expected to be initialized. This is
* the level of initialization expected by default and also after a full reset

View File

@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
if (abo->placements[i].mem_type == TTM_PL_VRAM)
abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
}

View File

@ -674,12 +674,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
if (adev->gfx.enable_cleaner_shader &&
ring->funcs->emit_cleaner_shader &&
job->enforce_isolation)
ring->funcs->emit_cleaner_shader(ring);
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
!(job->enforce_isolation && !job->vmid))
return 0;
amdgpu_ring_ib_begin(ring);
@ -690,6 +686,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
if (adev->gfx.enable_cleaner_shader &&
ring->funcs->emit_cleaner_shader &&
job->enforce_isolation)
ring->funcs->emit_cleaner_shader(ring);
if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);

View File

@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@ -574,8 +576,12 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
{
int err;
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
"amdgpu/%s_mec.bin", chip_name);
if (amdgpu_sriov_vf(adev))
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
"amdgpu/%s_sjt_mec.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
"amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);

View File

@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct amdgpu_ring *ring = amdgpu_job_ring(job);
unsigned i;
/* No patching necessary for the first instance */

View File

@ -1423,6 +1423,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
bool cache_line_size_missing,
struct kfd_gpu_cache_info *pcache_info)
{
struct amdgpu_device *adev = kdev->adev;
@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Data Cache per SQC */
@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 64;
i++;
}
/* GL1 Data Cache per SA */
@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0;
if (cache_line_size_missing)
pcache_info[i].cache_line_size = 128;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
pcache_info[i].cache_line_size = 128;
i++;
}
/* L3 Data Cache per GPU */
@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = 0;
pcache_info[i].cache_line_size = 64;
i++;
}
return i;
@ -1569,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
bool cache_line_size_missing = false;
switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
@ -1692,10 +1703,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
/* Cacheline size not available in IP discovery for gc11.
* kfd_fill_gpu_cache_info_from_gfx_config to hard code it
*/
cache_line_size_missing = true;
fallthrough;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
cache_line_size_missing,
*pcache_info);
break;
default:
*pcache_info = dummy_cache_info;

View File

@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
if (!pdd->proc_ctx_cpu_ptr) {
r = amdgpu_amdkfd_alloc_gtt_mem(adev,
AMDGPU_MES_PROC_CTX_SIZE,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr,
false);
if (r) {
dev_err(adev->dev,
"failed to allocate process context bo\n");
return r;
}
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
queue_input.process_id = qpd->pqm->process->pasid;
queue_input.page_table_base_addr = qpd->page_table_base;

View File

@ -306,7 +306,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) {
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE);
DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, src[i]);
if (r) {
dev_err(dev, "%s: fail %d dma_map_page\n",
@ -629,7 +629,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_oom;
}
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, dst[i]);
if (r) {
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);

View File

@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
if (pdd->dev->kfd->shared_resources.enable_mes)
if (pdd->dev->kfd->shared_resources.enable_mes &&
pdd->proc_ctx_cpu_ptr)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
&pdd->proc_ctx_bo);
/*
@ -1608,7 +1609,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
@ -1632,21 +1632,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
if (dev->kfd->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_PROC_CTX_SIZE,
&pdd->proc_ctx_bo,
&pdd->proc_ctx_gpu_addr,
&pdd->proc_ctx_cpu_ptr,
false);
if (retval) {
dev_err(dev->adev->dev,
"failed to allocate process context bo\n");
goto err_free_pdd;
}
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
p->pdds[p->n_pdds++] = pdd;
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@ -1658,10 +1643,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
idr_init(&pdd->alloc_idr);
return pdd;
err_free_pdd:
kfree(pdd);
return NULL;
}
/**

View File

@ -212,13 +212,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
void pqm_uninit(struct process_queue_manager *pqm)
{
struct process_queue_node *pqn, *next;
struct kfd_process_device *pdd;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
if (pqn->q) {
pdd = kfd_get_process_device_data(pqn->q->device, pqm->process);
kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
kfd_queue_release_buffers(pdd, &pqn->q->properties);
struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
pqm->process);
if (pdd) {
kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
kfd_queue_release_buffers(pdd, &pqn->q->properties);
} else {
WARN_ON(!pdd);
}
pqm_clean_queue_resource(pqm, pqn);
}

View File

@ -164,6 +164,7 @@ enum amd_pp_task {
};
enum PP_SMC_POWER_PROFILE {
PP_SMC_POWER_PROFILE_UNKNOWN = -1,
PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0,
PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1,
PP_SMC_POWER_PROFILE_POWERSAVING = 0x2,

View File

@ -764,6 +764,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
smu->smu_baco.platform_support = false;
smu->smu_baco.maco_support = false;
smu->user_dpm_profile.fan_mode = -1;
smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN;
mutex_init(&smu->message_lock);
@ -1248,6 +1249,21 @@ static bool smu_is_workload_profile_available(struct smu_context *smu,
return smu->workload_map && smu->workload_map[profile].valid_mapping;
}
static void smu_init_power_profile(struct smu_context *smu)
{
if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) {
if (smu->is_apu ||
!smu_is_workload_profile_available(
smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
smu->power_profile_mode =
PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
else
smu->power_profile_mode =
PP_SMC_POWER_PROFILE_FULLSCREEN3D;
}
smu_power_profile_mode_get(smu, smu->power_profile_mode);
}
static int smu_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
@ -1269,13 +1285,7 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
if (smu->is_apu ||
!smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
else
smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
smu_power_profile_mode_get(smu, smu->power_profile_mode);
smu_init_power_profile(smu);
smu->display_config = &adev->pm.pm_display_cfg;
smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;

View File

@ -2810,4 +2810,5 @@ void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
smu->workload_map = smu_v13_0_7_workload_map;
smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION;
smu_v13_0_set_smu_mailbox_registers(smu);
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
}