mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 17:23:36 +00:00
drm fixes for 6.13-rc3
i915: - Don't use indexed register writes needlessly [dsb] - Stop using non-posted DSB writes for legacy LUT [color] - Fix NULL pointer dereference in capture_engine - Fix memory leak by correcting cache object name in error handler xe: - Fix a KUNIT test error message (Mirsad Todorovac) - Fix an invalidation fence PM ref leak (Daniele) - Fix a register pool UAF (Lucas) amdgpu: - ISP hw init fix - SR-IOV fixes - Fix contiguous VRAM mapping for UVD on older GPUs - Fix some regressions due to drm scheduler changes - Workload profile fixes - Cleaner shader fix amdkfd: - Fix DMA map direction for migration - Fix a potential null pointer dereference - Cacheline size fixes - Runtime PM fix -----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmdcpCsACgkQDHTzWXnE hr6RXg//Z3nd4gM4OU5AtSrfQitBa1WkLKzFzajkn3nCU2XOIWtikDaWXrUVcXIj s1FOaALnmyVW+BypxBQSGEzBxw1kzo5P4Xlvx9cmhNyDOe0PqupYVcDytdNyOrJF w6eh36T7KMT9fhFpp64mwPURtLWkb2BmsQ5ZRHeGJ1bbcV+NyPRu6UPT0CpRhR0L 1h+maVKMpanpPzjzoxDmt6cgKLjIYq8e6JHNyOY+sn+hf0UP7eYp2D3qreasV4yK XnvizDvxoy8jmlPLHfXd5meEluDrg3zMNS0TlNfHdIOesida3qXhHK9EubxqlCeP cgbrqvw7BR2fFYozCpbkQaZtFu4dcZIoo/QmXGzMVXR/HCox20ZsmNF1G0Pigjlr 21IGYv7QDxXp5+S6OUcZOvN8Z3U7OFMZzv2+kzNbm+Y7htdQG+uZ216o0X9HVJGe Fehs31QYVpmoYu38LbXUa/LmkgSVslGwwaxqNigRykE5Eia4WzdFAPED50m53wIE 6izNamXJI+xbbwea3xSI1pLbEf/9LKADY5OaXlMxzm5jWkLQKZWeasGyJPtBDAe3 4iEfcARRCrREI+WqWku9rxh8BupJmmiJrT4mFXpW6pFdDrtjZu5rkvmX4Hh4UTZf VrlnR4R+MfEn+IoKp3M6R/830HQcFuRkJnCn7ZmOTDrl07t7Fd4= =P9h4 -----END PGP SIGNATURE----- Merge tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel Pull drm fixes from Dave Airlie: "This is the weekly fixes pull for drm. Just has i915, xe and amdgpu changes in it. Nothing too major in here: i915: - Don't use indexed register writes needlessly [dsb] - Stop using non-posted DSB writes for legacy LUT [color] - Fix NULL pointer dereference in capture_engine - Fix memory leak by correcting cache object name in error handler xe: - Fix a KUNIT test error message (Mirsad Todorovac) - Fix an invalidation fence PM ref leak (Daniele) - Fix a register pool UAF (Lucas) amdgpu: - ISP hw init fix - SR-IOV fixes - Fix contiguous VRAM mapping for UVD on older GPUs - Fix some regressions due to drm scheduler changes - Workload profile fixes - Cleaner shader fix amdkfd: - Fix DMA map direction for migration - Fix a potential null pointer dereference - Cacheline size fixes - Runtime PM fix" * tag 'drm-fixes-2024-12-14' of https://gitlab.freedesktop.org/drm/kernel: drm/xe/reg_sr: Remove register pool drm/xe: Call invalidation_fence_fini for PT inval fences in error state drm/xe: fix the ERR_PTR() returned on failure to allocate tiny pt drm/amdkfd: pause autosuspend when creating pdd drm/amdgpu: fix when the cleaner shader is emitted drm/amdgpu: Fix ISP HW init issue drm/amdkfd: hard-code MALL cacheline size for gfx11, gfx12 drm/amdkfd: hard-code cacheline size for gfx11 drm/amdkfd: Dereference null return value drm/i915: Fix memory leak by correcting cache object name in error handler drm/i915: Fix NULL pointer dereference in capture_engine drm/i915/color: Stop using non-posted DSB writes for legacy LUT drm/i915/dsb: Don't use indexed register writes needlessly drm/amdkfd: Correct the migration DMA map direction drm/amd/pm: Set SMU v13.0.7 default workload type drm/amd/pm: Initialize power profile mode amdgpu/uvd: get ring reference from rq scheduler drm/amdgpu: fix UVD contiguous CS mapping problem drm/amdgpu: use sjt mec fw on gfx943 for sriov Revert "drm/amdgpu: Fix ISP hw init issue"
This commit is contained in:
commit
e72da82d5a
@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
|
||||
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
|
||||
return -EINVAL;
|
||||
|
||||
/* Make sure VRAM is allocated contigiously */
|
||||
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
|
||||
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
|
||||
for (i = 0; i < (*bo)->placement.num_placement; i++)
|
||||
(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
|
||||
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
|
||||
if (r)
|
||||
return r;
|
||||
if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
|
||||
!((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
|
||||
|
||||
amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
|
||||
for (i = 0; i < (*bo)->placement.num_placement; i++)
|
||||
(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
|
||||
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = {
|
||||
"LAST",
|
||||
};
|
||||
|
||||
#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0)
|
||||
#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
|
||||
/*
|
||||
* Default init level where all blocks are expected to be initialized. This is
|
||||
* the level of initialization expected by default and also after a full reset
|
||||
|
@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
|
||||
for (i = 0; i < abo->placement.num_placement; ++i) {
|
||||
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
|
||||
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
|
||||
if (abo->placements[i].mem_type == TTM_PL_VRAM)
|
||||
abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -674,12 +674,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
|
||||
ring->funcs->emit_wreg;
|
||||
|
||||
if (adev->gfx.enable_cleaner_shader &&
|
||||
ring->funcs->emit_cleaner_shader &&
|
||||
job->enforce_isolation)
|
||||
ring->funcs->emit_cleaner_shader(ring);
|
||||
|
||||
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
|
||||
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
|
||||
!(job->enforce_isolation && !job->vmid))
|
||||
return 0;
|
||||
|
||||
amdgpu_ring_ib_begin(ring);
|
||||
@ -690,6 +686,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
if (need_pipe_sync)
|
||||
amdgpu_ring_emit_pipeline_sync(ring);
|
||||
|
||||
if (adev->gfx.enable_cleaner_shader &&
|
||||
ring->funcs->emit_cleaner_shader &&
|
||||
job->enforce_isolation)
|
||||
ring->funcs->emit_cleaner_shader(ring);
|
||||
|
||||
if (vm_flush_needed) {
|
||||
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
|
||||
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
|
||||
|
@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
|
||||
MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
|
||||
|
||||
#define GFX9_MEC_HPD_SIZE 4096
|
||||
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
|
||||
@ -574,8 +576,12 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
|
||||
{
|
||||
int err;
|
||||
|
||||
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
|
||||
"amdgpu/%s_mec.bin", chip_name);
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
|
||||
"amdgpu/%s_sjt_mec.bin", chip_name);
|
||||
else
|
||||
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
|
||||
"amdgpu/%s_mec.bin", chip_name);
|
||||
if (err)
|
||||
goto out;
|
||||
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
|
||||
|
@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib)
|
||||
{
|
||||
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
|
||||
struct amdgpu_ring *ring = amdgpu_job_ring(job);
|
||||
unsigned i;
|
||||
|
||||
/* No patching necessary for the first instance */
|
||||
|
@ -1423,6 +1423,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
|
||||
|
||||
|
||||
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||
bool cache_line_size_missing,
|
||||
struct kfd_gpu_cache_info *pcache_info)
|
||||
{
|
||||
struct amdgpu_device *adev = kdev->adev;
|
||||
@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE);
|
||||
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
|
||||
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
|
||||
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
|
||||
pcache_info[i].cache_line_size = 128;
|
||||
i++;
|
||||
}
|
||||
/* Scalar L1 Instruction Cache per SQC */
|
||||
@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE);
|
||||
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
|
||||
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
|
||||
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
|
||||
pcache_info[i].cache_line_size = 128;
|
||||
i++;
|
||||
}
|
||||
/* Scalar L1 Data Cache per SQC */
|
||||
@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE);
|
||||
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
|
||||
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
|
||||
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
|
||||
pcache_info[i].cache_line_size = 64;
|
||||
i++;
|
||||
}
|
||||
/* GL1 Data Cache per SA */
|
||||
@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE);
|
||||
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
|
||||
pcache_info[i].cache_line_size = 0;
|
||||
if (cache_line_size_missing)
|
||||
pcache_info[i].cache_line_size = 128;
|
||||
i++;
|
||||
}
|
||||
/* L2 Data Cache per GPU (Total Tex Cache) */
|
||||
@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE);
|
||||
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
|
||||
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
|
||||
if (cache_line_size_missing && !pcache_info[i].cache_line_size)
|
||||
pcache_info[i].cache_line_size = 128;
|
||||
i++;
|
||||
}
|
||||
/* L3 Data Cache per GPU */
|
||||
@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
|
||||
CRAT_CACHE_FLAGS_DATA_CACHE |
|
||||
CRAT_CACHE_FLAGS_SIMD_CACHE);
|
||||
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
|
||||
pcache_info[i].cache_line_size = 0;
|
||||
pcache_info[i].cache_line_size = 64;
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
@ -1569,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
|
||||
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
|
||||
{
|
||||
int num_of_cache_types = 0;
|
||||
bool cache_line_size_missing = false;
|
||||
|
||||
switch (kdev->adev->asic_type) {
|
||||
case CHIP_KAVERI:
|
||||
@ -1692,10 +1703,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
|
||||
case IP_VERSION(11, 5, 0):
|
||||
case IP_VERSION(11, 5, 1):
|
||||
case IP_VERSION(11, 5, 2):
|
||||
/* Cacheline size not available in IP discovery for gc11.
|
||||
* kfd_fill_gpu_cache_info_from_gfx_config to hard code it
|
||||
*/
|
||||
cache_line_size_missing = true;
|
||||
fallthrough;
|
||||
case IP_VERSION(12, 0, 0):
|
||||
case IP_VERSION(12, 0, 1):
|
||||
num_of_cache_types =
|
||||
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
|
||||
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
|
||||
cache_line_size_missing,
|
||||
*pcache_info);
|
||||
break;
|
||||
default:
|
||||
*pcache_info = dummy_cache_info;
|
||||
|
@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
|
||||
if (!down_read_trylock(&adev->reset_domain->sem))
|
||||
return -EIO;
|
||||
|
||||
if (!pdd->proc_ctx_cpu_ptr) {
|
||||
r = amdgpu_amdkfd_alloc_gtt_mem(adev,
|
||||
AMDGPU_MES_PROC_CTX_SIZE,
|
||||
&pdd->proc_ctx_bo,
|
||||
&pdd->proc_ctx_gpu_addr,
|
||||
&pdd->proc_ctx_cpu_ptr,
|
||||
false);
|
||||
if (r) {
|
||||
dev_err(adev->dev,
|
||||
"failed to allocate process context bo\n");
|
||||
return r;
|
||||
}
|
||||
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
|
||||
}
|
||||
|
||||
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
|
||||
queue_input.process_id = qpd->pqm->process->pasid;
|
||||
queue_input.page_table_base_addr = qpd->page_table_base;
|
||||
|
@ -306,7 +306,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
|
||||
spage = migrate_pfn_to_page(migrate->src[i]);
|
||||
if (spage && !is_zone_device_page(spage)) {
|
||||
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
|
||||
DMA_TO_DEVICE);
|
||||
DMA_BIDIRECTIONAL);
|
||||
r = dma_mapping_error(dev, src[i]);
|
||||
if (r) {
|
||||
dev_err(dev, "%s: fail %d dma_map_page\n",
|
||||
@ -629,7 +629,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
|
||||
goto out_oom;
|
||||
}
|
||||
|
||||
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
|
||||
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
|
||||
r = dma_mapping_error(dev, dst[i]);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
|
||||
|
@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
|
||||
|
||||
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
|
||||
|
||||
if (pdd->dev->kfd->shared_resources.enable_mes)
|
||||
if (pdd->dev->kfd->shared_resources.enable_mes &&
|
||||
pdd->proc_ctx_cpu_ptr)
|
||||
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
|
||||
&pdd->proc_ctx_bo);
|
||||
/*
|
||||
@ -1608,7 +1609,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
|
||||
struct kfd_process *p)
|
||||
{
|
||||
struct kfd_process_device *pdd = NULL;
|
||||
int retval = 0;
|
||||
|
||||
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
|
||||
return NULL;
|
||||
@ -1632,21 +1632,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
|
||||
pdd->user_gpu_id = dev->id;
|
||||
atomic64_set(&pdd->evict_duration_counter, 0);
|
||||
|
||||
if (dev->kfd->shared_resources.enable_mes) {
|
||||
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
|
||||
AMDGPU_MES_PROC_CTX_SIZE,
|
||||
&pdd->proc_ctx_bo,
|
||||
&pdd->proc_ctx_gpu_addr,
|
||||
&pdd->proc_ctx_cpu_ptr,
|
||||
false);
|
||||
if (retval) {
|
||||
dev_err(dev->adev->dev,
|
||||
"failed to allocate process context bo\n");
|
||||
goto err_free_pdd;
|
||||
}
|
||||
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
|
||||
}
|
||||
|
||||
p->pdds[p->n_pdds++] = pdd;
|
||||
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
|
||||
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
|
||||
@ -1658,10 +1643,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
|
||||
idr_init(&pdd->alloc_idr);
|
||||
|
||||
return pdd;
|
||||
|
||||
err_free_pdd:
|
||||
kfree(pdd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -212,13 +212,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
|
||||
void pqm_uninit(struct process_queue_manager *pqm)
|
||||
{
|
||||
struct process_queue_node *pqn, *next;
|
||||
struct kfd_process_device *pdd;
|
||||
|
||||
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
|
||||
if (pqn->q) {
|
||||
pdd = kfd_get_process_device_data(pqn->q->device, pqm->process);
|
||||
kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
|
||||
kfd_queue_release_buffers(pdd, &pqn->q->properties);
|
||||
struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
|
||||
pqm->process);
|
||||
if (pdd) {
|
||||
kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
|
||||
kfd_queue_release_buffers(pdd, &pqn->q->properties);
|
||||
} else {
|
||||
WARN_ON(!pdd);
|
||||
}
|
||||
pqm_clean_queue_resource(pqm, pqn);
|
||||
}
|
||||
|
||||
|
@ -164,6 +164,7 @@ enum amd_pp_task {
|
||||
};
|
||||
|
||||
enum PP_SMC_POWER_PROFILE {
|
||||
PP_SMC_POWER_PROFILE_UNKNOWN = -1,
|
||||
PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0,
|
||||
PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1,
|
||||
PP_SMC_POWER_PROFILE_POWERSAVING = 0x2,
|
||||
|
@ -764,6 +764,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
|
||||
smu->smu_baco.platform_support = false;
|
||||
smu->smu_baco.maco_support = false;
|
||||
smu->user_dpm_profile.fan_mode = -1;
|
||||
smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN;
|
||||
|
||||
mutex_init(&smu->message_lock);
|
||||
|
||||
@ -1248,6 +1249,21 @@ static bool smu_is_workload_profile_available(struct smu_context *smu,
|
||||
return smu->workload_map && smu->workload_map[profile].valid_mapping;
|
||||
}
|
||||
|
||||
static void smu_init_power_profile(struct smu_context *smu)
|
||||
{
|
||||
if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) {
|
||||
if (smu->is_apu ||
|
||||
!smu_is_workload_profile_available(
|
||||
smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
|
||||
smu->power_profile_mode =
|
||||
PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
|
||||
else
|
||||
smu->power_profile_mode =
|
||||
PP_SMC_POWER_PROFILE_FULLSCREEN3D;
|
||||
}
|
||||
smu_power_profile_mode_get(smu, smu->power_profile_mode);
|
||||
}
|
||||
|
||||
static int smu_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
{
|
||||
struct amdgpu_device *adev = ip_block->adev;
|
||||
@ -1269,13 +1285,7 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
|
||||
atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
|
||||
atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
|
||||
|
||||
if (smu->is_apu ||
|
||||
!smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
|
||||
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
|
||||
else
|
||||
smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
|
||||
smu_power_profile_mode_get(smu, smu->power_profile_mode);
|
||||
|
||||
smu_init_power_profile(smu);
|
||||
smu->display_config = &adev->pm.pm_display_cfg;
|
||||
|
||||
smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
|
||||
|
@ -2810,4 +2810,5 @@ void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
|
||||
smu->workload_map = smu_v13_0_7_workload_map;
|
||||
smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION;
|
||||
smu_v13_0_set_smu_mailbox_registers(smu);
|
||||
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
|
||||
}
|
||||
|
@ -1343,6 +1343,17 @@ static void ilk_lut_write(const struct intel_crtc_state *crtc_state,
|
||||
intel_de_write_fw(display, reg, val);
|
||||
}
|
||||
|
||||
static void ilk_lut_write_indexed(const struct intel_crtc_state *crtc_state,
|
||||
i915_reg_t reg, u32 val)
|
||||
{
|
||||
struct intel_display *display = to_intel_display(crtc_state);
|
||||
|
||||
if (crtc_state->dsb_color_vblank)
|
||||
intel_dsb_reg_write_indexed(crtc_state->dsb_color_vblank, reg, val);
|
||||
else
|
||||
intel_de_write_fw(display, reg, val);
|
||||
}
|
||||
|
||||
static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state,
|
||||
const struct drm_property_blob *blob)
|
||||
{
|
||||
@ -1357,19 +1368,29 @@ static void ilk_load_lut_8(const struct intel_crtc_state *crtc_state,
|
||||
lut = blob->data;
|
||||
|
||||
/*
|
||||
* DSB fails to correctly load the legacy LUT
|
||||
* unless we either write each entry twice,
|
||||
* or use non-posted writes
|
||||
* DSB fails to correctly load the legacy LUT unless
|
||||
* we either write each entry twice when using posted
|
||||
* writes, or we use non-posted writes.
|
||||
*
|
||||
* If palette anti-collision is active during LUT
|
||||
* register writes:
|
||||
* - posted writes simply get dropped and thus the LUT
|
||||
* contents may not be correctly updated
|
||||
* - non-posted writes are blocked and thus the LUT
|
||||
* contents are always correct, but simultaneous CPU
|
||||
* MMIO access will start to fail
|
||||
*
|
||||
* Choose the lesser of two evils and use posted writes.
|
||||
* Using posted writes is also faster, even when having
|
||||
* to write each register twice.
|
||||
*/
|
||||
if (crtc_state->dsb_color_vblank)
|
||||
intel_dsb_nonpost_start(crtc_state->dsb_color_vblank);
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
for (i = 0; i < 256; i++) {
|
||||
ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i),
|
||||
i9xx_lut_8(&lut[i]));
|
||||
|
||||
if (crtc_state->dsb_color_vblank)
|
||||
intel_dsb_nonpost_end(crtc_state->dsb_color_vblank);
|
||||
if (crtc_state->dsb_color_vblank)
|
||||
ilk_lut_write(crtc_state, LGC_PALETTE(pipe, i),
|
||||
i9xx_lut_8(&lut[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void ilk_load_lut_10(const struct intel_crtc_state *crtc_state,
|
||||
@ -1458,8 +1479,8 @@ static void bdw_load_lut_10(const struct intel_crtc_state *crtc_state,
|
||||
prec_index);
|
||||
|
||||
for (i = 0; i < lut_size; i++)
|
||||
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_10(&lut[i]));
|
||||
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_10(&lut[i]));
|
||||
|
||||
/*
|
||||
* Reset the index, otherwise it prevents the legacy palette to be
|
||||
@ -1612,16 +1633,16 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
|
||||
* ToDo: Extend to max 7.0. Enable 32 bit input value
|
||||
* as compared to just 16 to achieve this.
|
||||
*/
|
||||
ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
|
||||
DISPLAY_VER(display) >= 14 ?
|
||||
mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i]));
|
||||
ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
|
||||
DISPLAY_VER(display) >= 14 ?
|
||||
mtl_degamma_lut(&lut[i]) : glk_degamma_lut(&lut[i]));
|
||||
}
|
||||
|
||||
/* Clamp values > 1.0. */
|
||||
while (i++ < glk_degamma_lut_size(display))
|
||||
ilk_lut_write(crtc_state, PRE_CSC_GAMC_DATA(pipe),
|
||||
DISPLAY_VER(display) >= 14 ?
|
||||
1 << 24 : 1 << 16);
|
||||
ilk_lut_write_indexed(crtc_state, PRE_CSC_GAMC_DATA(pipe),
|
||||
DISPLAY_VER(display) >= 14 ?
|
||||
1 << 24 : 1 << 16);
|
||||
|
||||
ilk_lut_write(crtc_state, PRE_CSC_GAMC_INDEX(pipe), 0);
|
||||
}
|
||||
@ -1687,10 +1708,10 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
|
||||
for (i = 0; i < 9; i++) {
|
||||
const struct drm_color_lut *entry = &lut[i];
|
||||
|
||||
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
|
||||
ilk_lut_12p4_ldw(entry));
|
||||
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
|
||||
ilk_lut_12p4_udw(entry));
|
||||
ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
|
||||
ilk_lut_12p4_ldw(entry));
|
||||
ilk_lut_write_indexed(crtc_state, PREC_PAL_MULTI_SEG_DATA(pipe),
|
||||
ilk_lut_12p4_udw(entry));
|
||||
}
|
||||
|
||||
ilk_lut_write(crtc_state, PREC_PAL_MULTI_SEG_INDEX(pipe),
|
||||
@ -1726,10 +1747,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
|
||||
for (i = 1; i < 257; i++) {
|
||||
entry = &lut[i * 8];
|
||||
|
||||
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_ldw(entry));
|
||||
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_udw(entry));
|
||||
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_ldw(entry));
|
||||
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_udw(entry));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1747,10 +1768,10 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
|
||||
for (i = 0; i < 256; i++) {
|
||||
entry = &lut[i * 8 * 128];
|
||||
|
||||
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_ldw(entry));
|
||||
ilk_lut_write(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_udw(entry));
|
||||
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_ldw(entry));
|
||||
ilk_lut_write_indexed(crtc_state, PREC_PAL_DATA(pipe),
|
||||
ilk_lut_12p4_udw(entry));
|
||||
}
|
||||
|
||||
ilk_lut_write(crtc_state, PREC_PAL_INDEX(pipe),
|
||||
|
@ -273,16 +273,20 @@ static bool intel_dsb_prev_ins_is_indexed_write(struct intel_dsb *dsb, i915_reg_
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_dsb_reg_write() - Emit register wriite to the DSB context
|
||||
* intel_dsb_reg_write_indexed() - Emit register wriite to the DSB context
|
||||
* @dsb: DSB context
|
||||
* @reg: register address.
|
||||
* @val: value.
|
||||
*
|
||||
* This function is used for writing register-value pair in command
|
||||
* buffer of DSB.
|
||||
*
|
||||
* Note that indexed writes are slower than normal MMIO writes
|
||||
* for a small number (less than 5 or so) of writes to the same
|
||||
* register.
|
||||
*/
|
||||
void intel_dsb_reg_write(struct intel_dsb *dsb,
|
||||
i915_reg_t reg, u32 val)
|
||||
void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
|
||||
i915_reg_t reg, u32 val)
|
||||
{
|
||||
/*
|
||||
* For example the buffer will look like below for 3 dwords for auto
|
||||
@ -340,6 +344,15 @@ void intel_dsb_reg_write(struct intel_dsb *dsb,
|
||||
}
|
||||
}
|
||||
|
||||
void intel_dsb_reg_write(struct intel_dsb *dsb,
|
||||
i915_reg_t reg, u32 val)
|
||||
{
|
||||
intel_dsb_emit(dsb, val,
|
||||
(DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) |
|
||||
(DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) |
|
||||
i915_mmio_reg_offset(reg));
|
||||
}
|
||||
|
||||
static u32 intel_dsb_mask_to_byte_en(u32 mask)
|
||||
{
|
||||
return (!!(mask & 0xff000000) << 3 |
|
||||
|
@ -34,6 +34,8 @@ void intel_dsb_finish(struct intel_dsb *dsb);
|
||||
void intel_dsb_cleanup(struct intel_dsb *dsb);
|
||||
void intel_dsb_reg_write(struct intel_dsb *dsb,
|
||||
i915_reg_t reg, u32 val);
|
||||
void intel_dsb_reg_write_indexed(struct intel_dsb *dsb,
|
||||
i915_reg_t reg, u32 val);
|
||||
void intel_dsb_reg_write_masked(struct intel_dsb *dsb,
|
||||
i915_reg_t reg, u32 mask, u32 val);
|
||||
void intel_dsb_noop(struct intel_dsb *dsb, int count);
|
||||
|
@ -1643,9 +1643,21 @@ capture_engine(struct intel_engine_cs *engine,
|
||||
return NULL;
|
||||
|
||||
intel_engine_get_hung_entity(engine, &ce, &rq);
|
||||
if (rq && !i915_request_started(rq))
|
||||
drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n",
|
||||
engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id);
|
||||
if (rq && !i915_request_started(rq)) {
|
||||
/*
|
||||
* We want to know also what is the guc_id of the context,
|
||||
* but if we don't have the context reference, then skip
|
||||
* printing it.
|
||||
*/
|
||||
if (ce)
|
||||
drm_info(&engine->gt->i915->drm,
|
||||
"Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n",
|
||||
engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id);
|
||||
else
|
||||
drm_info(&engine->gt->i915->drm,
|
||||
"Got hung context on %s with active request %lld:%lld not yet started\n",
|
||||
engine->name, rq->fence.context, rq->fence.seqno);
|
||||
}
|
||||
|
||||
if (rq) {
|
||||
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
|
||||
|
@ -506,6 +506,6 @@ int __init i915_scheduler_module_init(void)
|
||||
return 0;
|
||||
|
||||
err_priorities:
|
||||
kmem_cache_destroy(slab_priorities);
|
||||
kmem_cache_destroy(slab_dependencies);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -224,8 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
|
||||
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
|
||||
XE_BO_FLAG_PINNED);
|
||||
if (IS_ERR(tiny)) {
|
||||
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
|
||||
PTR_ERR(pt));
|
||||
KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
|
||||
PTR_ERR(tiny));
|
||||
goto free_pt;
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,14 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe
|
||||
__invalidation_fence_signal(xe, fence);
|
||||
}
|
||||
|
||||
void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
|
||||
{
|
||||
if (WARN_ON_ONCE(!fence->gt))
|
||||
return;
|
||||
|
||||
__invalidation_fence_signal(gt_to_xe(fence->gt), fence);
|
||||
}
|
||||
|
||||
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
|
||||
{
|
||||
struct xe_gt *gt = container_of(work, struct xe_gt,
|
||||
|
@ -28,6 +28,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
|
||||
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
|
||||
struct xe_gt_tlb_invalidation_fence *fence,
|
||||
bool stack);
|
||||
void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence);
|
||||
|
||||
static inline void
|
||||
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
|
||||
|
@ -1333,8 +1333,7 @@ static void invalidation_fence_cb(struct dma_fence *fence,
|
||||
queue_work(system_wq, &ifence->work);
|
||||
} else {
|
||||
ifence->base.base.error = ifence->fence->error;
|
||||
dma_fence_signal(&ifence->base.base);
|
||||
dma_fence_put(&ifence->base.base);
|
||||
xe_gt_tlb_invalidation_fence_signal(&ifence->base);
|
||||
}
|
||||
dma_fence_put(ifence->fence);
|
||||
}
|
||||
|
@ -27,46 +27,27 @@
|
||||
#include "xe_reg_whitelist.h"
|
||||
#include "xe_rtp_types.h"
|
||||
|
||||
#define XE_REG_SR_GROW_STEP_DEFAULT 16
|
||||
|
||||
static void reg_sr_fini(struct drm_device *drm, void *arg)
|
||||
{
|
||||
struct xe_reg_sr *sr = arg;
|
||||
struct xe_reg_sr_entry *entry;
|
||||
unsigned long reg;
|
||||
|
||||
xa_for_each(&sr->xa, reg, entry)
|
||||
kfree(entry);
|
||||
|
||||
xa_destroy(&sr->xa);
|
||||
kfree(sr->pool.arr);
|
||||
memset(&sr->pool, 0, sizeof(sr->pool));
|
||||
}
|
||||
|
||||
int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe)
|
||||
{
|
||||
xa_init(&sr->xa);
|
||||
memset(&sr->pool, 0, sizeof(sr->pool));
|
||||
sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT;
|
||||
sr->name = name;
|
||||
|
||||
return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr);
|
||||
}
|
||||
EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init);
|
||||
|
||||
static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr)
|
||||
{
|
||||
if (sr->pool.used == sr->pool.allocated) {
|
||||
struct xe_reg_sr_entry *arr;
|
||||
|
||||
arr = krealloc_array(sr->pool.arr,
|
||||
ALIGN(sr->pool.allocated + 1, sr->pool.grow_step),
|
||||
sizeof(*arr), GFP_KERNEL);
|
||||
if (!arr)
|
||||
return NULL;
|
||||
|
||||
sr->pool.arr = arr;
|
||||
sr->pool.allocated += sr->pool.grow_step;
|
||||
}
|
||||
|
||||
return &sr->pool.arr[sr->pool.used++];
|
||||
}
|
||||
|
||||
static bool compatible_entries(const struct xe_reg_sr_entry *e1,
|
||||
const struct xe_reg_sr_entry *e2)
|
||||
{
|
||||
@ -112,7 +93,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr,
|
||||
return 0;
|
||||
}
|
||||
|
||||
pentry = alloc_entry(sr);
|
||||
pentry = kmalloc(sizeof(*pentry), GFP_KERNEL);
|
||||
if (!pentry) {
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
|
@ -20,12 +20,6 @@ struct xe_reg_sr_entry {
|
||||
};
|
||||
|
||||
struct xe_reg_sr {
|
||||
struct {
|
||||
struct xe_reg_sr_entry *arr;
|
||||
unsigned int used;
|
||||
unsigned int allocated;
|
||||
unsigned int grow_step;
|
||||
} pool;
|
||||
struct xarray xa;
|
||||
const char *name;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user