amd-drm-fixes-6.12-2024-09-27:

amdgpu:
 - MES 12 fix
 - KFD fence sync fix
 - SR-IOV fixes
 - VCN 4.0.6 fix
 - SDMA 7.x fix
 - Bump driver version to note cleared VRAM support
 - SWSMU fix
 
 amdgpu:
 - CU occupancy logic fix
 - SDMA queue fix
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZvcUAQAKCRC93/aFa7yZ
 2M1SAQDloEHjf+AxBWT5GhrjPCAgmHPhwTCHF4ILZaS2UuIZ+QD/TU2p8cnzn7ZZ
 7eDeMYcZCeY7xI96SmxyAwPBbM70EAY=
 =DAqO
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-fixes-6.12-2024-09-27' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-fixes-6.12-2024-09-27:

amdgpu:
- MES 12 fix
- KFD fence sync fix
- SR-IOV fixes
- VCN 4.0.6 fix
- SDMA 7.x fix
- Bump driver version to note cleared VRAM support
- SWSMU fix

amdgpu:
- CU occupancy logic fix
- SDMA queue fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240927202819.2978109-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2024-09-28 08:42:46 +10:00
commit e7268dd9bb
100 changed files with 1024 additions and 828 deletions

View File

@ -1083,10 +1083,6 @@ struct amdgpu_device {
struct amdgpu_virt virt;
/* link all shadow bo */
struct list_head shadow_list;
struct mutex shadow_list_lock;
/* record hw reset is performed */
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];

View File

@ -511,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
return -EINVAL;
}
/* udpate aca bank to aca source error_cache first */
/* update aca bank to aca source error_cache first */
ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
if (ret)
return ret;

View File

@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
* @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
int *wave_cnt, int *vmid, uint32_t inst)
struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
{
int pipe_idx;
int queue_slot;
unsigned int reg_val;
unsigned int wave_cnt;
/*
* Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
* parameters to read out waves in flight. Get VMID if there are
* non-zero waves in flight.
*/
*vmid = 0xFF;
*wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0)
*vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (wave_cnt != 0) {
queue_cnt->wave_cnt += wave_cnt;
queue_cnt->doorbell_off =
(RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
}
}
/**
@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* or more queues running and submitting waves to compute units.
*
* @adev: Handle of device from which to get number of waves in flight
* @pasid: Identifies the process for which this query call is invoked
* @pasid_wave_cnt: Output parameter updated with number of waves in flight that
* belong to process with given pasid
* @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
* for comparison later.
* @max_waves_per_cu: Output parameter updated with maximum number of waves
* possible per Compute Unit
* @inst: xcc's instance number on a multi-XCC setup
@ -1011,34 +1012,28 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* number of waves that are in flight for the queue at specified index. The
* index ranges from 0 to 7.
*
* If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
* of the wave(s).
* If non-zero waves are in flight, store the corresponding doorbell offset
* of the queue, along with the wave count.
*
* Determine if VMID from above step maps to pasid provided as parameter. If
* it matches agrregate the wave count. That the VMID will not match pasid is
* a normal condition i.e. a device is expected to support multiple queues
* from multiple proceses.
* Determine if the queue belongs to the process by comparing the doorbell
* offset against the process's queues. If it matches, aggregate the wave
* count for the process.
*
* Reading registers referenced above involves programming GRBM appropriately
*/
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
struct kfd_cu_occupancy *cu_occupancy,
int *max_waves_per_cu, uint32_t inst)
{
int qidx;
int vmid;
int se_idx;
int sh_idx;
int se_cnt;
int sh_cnt;
int wave_cnt;
int queue_map;
int pasid_tmp;
int max_queue_cnt;
int vmid_wave_cnt = 0;
DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
lock_spi_csq_mutexes(adev);
soc15_grbm_select(adev, 1, 0, 0, 0, inst);
soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
/*
* Iterate through the shader engines and arrays of the device
@ -1048,51 +1043,38 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
AMDGPU_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
sh_cnt = adev->gfx.config.max_sh_per_se;
se_cnt = adev->gfx.config.max_shader_engines;
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
/*
* Assumption: queue map encodes following schema: four
* pipes per each micro-engine, with each pipe mapping
* eight queues. This schema is true for GFX9 devices
* and must be verified for newer device families
/*
* Assumption: queue map encodes following schema: four
* pipes per each micro-engine, with each pipe mapping
* eight queues. This schema is true for GFX9 devices
* and must be verified for newer device families
*/
for (qidx = 0; qidx < max_queue_cnt; qidx++) {
/* Skip qeueus that are not associated with
* compute functions
*/
for (qidx = 0; qidx < max_queue_cnt; qidx++) {
if (!test_bit(qidx, cp_queue_bitmap))
continue;
/* Skip qeueus that are not associated with
* compute functions
*/
if (!test_bit(qidx, cp_queue_bitmap))
continue;
if (!(queue_map & (1 << qidx)))
continue;
if (!(queue_map & (1 << qidx)))
continue;
/* Get number of waves in flight and aggregate them */
get_wave_count(adev, qidx, &wave_cnt, &vmid,
inst);
if (wave_cnt != 0) {
pasid_tmp =
RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
mmIH_VMID_0_LUT) + vmid);
if (pasid_tmp == pasid)
vmid_wave_cnt += wave_cnt;
}
}
/* Get number of waves in flight and aggregate them */
get_wave_count(adev, qidx, &cu_occupancy[qidx],
inst);
}
}
amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
soc15_grbm_select(adev, 0, 0, 0, 0, inst);
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
unlock_spi_csq_mutexes(adev);
/* Update the output parameters and return */
*pasid_wave_cnt = vmid_wave_cnt;
*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
adev->gfx.cu_info.max_waves_per_simd;
}

View File

@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
struct kfd_cu_occupancy *cu_occupancy,
int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);

View File

@ -1499,7 +1499,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
}
}
ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
ret = amdgpu_bo_pin(bo, domain);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);

View File

@ -87,8 +87,9 @@ static bool check_atom_bios(uint8_t *bios, size_t size)
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
* For SR-IOV, the vbios image is also put in VRAM in the VF.
*/
static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
uint8_t __iomem *bios;
resource_size_t vram_base;
@ -284,10 +285,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
acpi_status status;
bool found = false;
/* ATRM is for the discrete card only */
if (adev->flags & AMD_IS_APU)
return false;
/* ATRM is for on-platform devices only */
if (dev_is_removable(&adev->pdev->dev))
return false;
@ -343,11 +340,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return igp_read_bios_from_vram(adev);
else
return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
false : amdgpu_asic_read_disabled_bios(adev);
return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
false : amdgpu_asic_read_disabled_bios(adev);
}
#ifdef CONFIG_ACPI
@ -414,7 +408,36 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
}
#endif
bool amdgpu_get_bios(struct amdgpu_device *adev)
static bool amdgpu_get_bios_apu(struct amdgpu_device *adev)
{
if (amdgpu_acpi_vfct_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
goto success;
}
if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}
if (amdgpu_read_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
goto success;
}
if (amdgpu_read_platform_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from platform\n");
goto success;
}
dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
return false;
success:
return true;
}
static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
@ -426,7 +449,8 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}
if (igp_read_bios_from_vram(adev)) {
/* this is required for SR-IOV */
if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}
@ -455,10 +479,24 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
return false;
success:
adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
return true;
}
bool amdgpu_get_bios(struct amdgpu_device *adev)
{
bool found;
if (adev->flags & AMD_IS_APU)
found = amdgpu_get_bios_apu(adev);
else
found = amdgpu_get_bios_dgpu(adev);
if (found)
adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
return found;
}
/* helper function for soc15 and onwards to read bios from rom */
bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes)

View File

@ -4107,9 +4107,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->mm_stats.lock);
spin_lock_init(&adev->wb.lock);
INIT_LIST_HEAD(&adev->shadow_list);
mutex_init(&adev->shadow_list_lock);
INIT_LIST_HEAD(&adev->reset_list);
INIT_LIST_HEAD(&adev->ras_list);
@ -5029,80 +5026,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
return 0;
}
/**
* amdgpu_device_recover_vram - Recover some VRAM contents
*
* @adev: amdgpu_device pointer
*
* Restores the contents of VRAM buffers from the shadows in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost.
*
* Returns:
* 0 on success, negative error code on failure.
*/
static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
{
struct dma_fence *fence = NULL, *next = NULL;
struct amdgpu_bo *shadow;
struct amdgpu_bo_vm *vmbo;
long r = 1, tmo;
if (amdgpu_sriov_runtime(adev))
tmo = msecs_to_jiffies(8000);
else
tmo = msecs_to_jiffies(100);
dev_info(adev->dev, "recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock);
list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
/* If vm is compute context or adev is APU, shadow will be NULL */
if (!vmbo->shadow)
continue;
shadow = vmbo->shadow;
/* No need to recover an evicted BO */
if (!shadow->tbo.resource ||
shadow->tbo.resource->mem_type != TTM_PL_TT ||
shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
continue;
r = amdgpu_bo_restore_shadow(shadow, &next);
if (r)
break;
if (fence) {
tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
fence = next;
if (tmo == 0) {
r = -ETIMEDOUT;
break;
} else if (tmo < 0) {
r = tmo;
break;
}
} else {
fence = next;
}
}
mutex_unlock(&adev->shadow_list_lock);
if (fence)
tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
if (r < 0 || tmo <= 0) {
dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
return -EIO;
}
dev_info(adev->dev, "recover vram bo from shadow done\n");
return 0;
}
/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
@ -5165,12 +5088,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
amdgpu_inc_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
}
if (r)
return r;
/* need to be called during full access so we can't do it later like
* bare-metal does.
@ -5569,9 +5488,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
}
}
if (!r)
r = amdgpu_device_recover_vram(tmp_adev);
else
if (r)
tmp_adev->asic_reset_res = r;
}
@ -6189,7 +6106,7 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
p2p_addressable = !(adev->gmc.aper_base & address_mask ||
aper_limit & address_mask);
}
return is_large_bar && p2p_access && p2p_addressable;
return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
#else
return false;
#endif

View File

@ -233,6 +233,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
}
if (!adev->enable_virtual_display) {
new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(new_abo,
amdgpu_display_supported_domains(adev, new_abo->flags));
if (unlikely(r != 0)) {
@ -1474,7 +1475,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) &&
((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
connector->display_info.is_hdmi &&
connector && connector->display_info.is_hdmi &&
amdgpu_display_is_hdtv_mode(mode)))) {
if (amdgpu_encoder->underscan_hborder != 0)
amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
@ -1759,6 +1760,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(aobj, true);
if (r == 0) {
aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
if (r != 0)
dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);

View File

@ -117,9 +117,10 @@
* - 3.56.0 - Update IB start address and size alignment for decode and encode
* - 3.57.0 - Compute tunneling on GFX10+
* - 3.58.0 - Add GFX12 DCC support
* - 3.59.0 - Cleared VRAM
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 58
#define KMS_DRIVER_MINOR 59
#define KMS_DRIVER_PATCHLEVEL 0
/*

View File

@ -43,8 +43,6 @@
#include "amdgpu_hmm.h"
#include "amdgpu_xgmi.h"
static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
@ -87,11 +85,11 @@ static const struct vm_operations_struct amdgpu_gem_vm_ops = {
static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
if (robj) {
amdgpu_hmm_unregister(robj);
amdgpu_bo_unref(&robj);
if (aobj) {
amdgpu_hmm_unregister(aobj);
ttm_bo_put(&aobj->tbo);
}
}
@ -126,7 +124,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
bo = &ubo->bo;
*obj = &bo->tbo.base;
(*obj)->funcs = &amdgpu_gem_object_funcs;
return 0;
}
@ -295,7 +292,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
return drm_gem_ttm_mmap(obj, vma);
}
static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
.free = amdgpu_gem_object_free,
.open = amdgpu_gem_object_open,
.close = amdgpu_gem_object_close,

View File

@ -33,6 +33,8 @@
#define AMDGPU_GEM_DOMAIN_MAX 0x3
#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
/*

View File

@ -107,8 +107,11 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
/*
* Do the coredump immediately after a job timeout to get a very
* close dump/snapshot/representation of GPU's current error status
* Skip it for SRIOV, since VF FLR will be triggered by host driver
* before job timeout
*/
amdgpu_job_core_dump(adev, job);
if (!amdgpu_sriov_vf(adev))
amdgpu_job_core_dump(adev, job);
if (amdgpu_gpu_recovery &&
amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {

View File

@ -77,24 +77,6 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
amdgpu_bo_destroy(tbo);
}
static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo;
struct amdgpu_bo_vm *vmbo;
bo = shadow_bo->parent;
vmbo = to_amdgpu_bo_vm(bo);
/* in case amdgpu_device_recover_vram got NULL of bo->parent */
if (!list_empty(&vmbo->shadow_list)) {
mutex_lock(&adev->shadow_list_lock);
list_del_init(&vmbo->shadow_list);
mutex_unlock(&adev->shadow_list_lock);
}
amdgpu_bo_destroy(tbo);
}
/**
* amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
* @bo: buffer object to be checked
@ -108,8 +90,7 @@ static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo)
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
if (bo->destroy == &amdgpu_bo_destroy ||
bo->destroy == &amdgpu_bo_user_destroy ||
bo->destroy == &amdgpu_bo_vm_destroy)
bo->destroy == &amdgpu_bo_user_destroy)
return true;
return false;
@ -583,6 +564,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (bo == NULL)
return -ENOMEM;
drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
@ -722,52 +704,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
return r;
}
/**
* amdgpu_bo_add_to_shadow_list - add a BO to the shadow list
*
* @vmbo: BO that will be inserted into the shadow list
*
* Insert a BO to the shadow list.
*/
void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(vmbo->bo.tbo.bdev);
mutex_lock(&adev->shadow_list_lock);
list_add_tail(&vmbo->shadow_list, &adev->shadow_list);
vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo);
vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy;
mutex_unlock(&adev->shadow_list_lock);
}
/**
* amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
*
* @shadow: &amdgpu_bo shadow to be restored
* @fence: dma_fence associated with the operation
*
* Copies a buffer object's shadow content back to the object.
* This is used for recovering a buffer from its shadow in case of a gpu
* reset where vram context may be lost.
*
* Returns:
* 0 for success or a negative error code on failure.
*/
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
uint64_t shadow_addr, parent_addr;
shadow_addr = amdgpu_bo_gpu_offset(shadow);
parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
amdgpu_bo_size(shadow), NULL, fence,
true, false, 0);
}
/**
* amdgpu_bo_kmap - map an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be mapped
@ -851,7 +787,7 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
if (bo == NULL)
return NULL;
ttm_bo_get(&bo->tbo);
drm_gem_object_get(&bo->tbo.base);
return bo;
}
@ -863,40 +799,30 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
*/
void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
struct ttm_buffer_object *tbo;
if ((*bo) == NULL)
return;
tbo = &((*bo)->tbo);
ttm_bo_put(tbo);
drm_gem_object_put(&(*bo)->tbo.base);
*bo = NULL;
}
/**
* amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
* amdgpu_bo_pin - pin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be pinned
* @domain: domain to be pinned to
* @min_offset: the start of requested address range
* @max_offset: the end of requested address range
*
* Pins the buffer object according to requested domain and address range. If
* the memory is unbound gart memory, binds the pages into gart table. Adjusts
* pin_count and pin_size accordingly.
* Pins the buffer object according to requested domain. If the memory is
* unbound gart memory, binds the pages into gart table. Adjusts pin_count and
* pin_size accordingly.
*
* Pinning means to lock pages in memory along with keeping them at a fixed
* offset. It is required when a buffer can not be moved, for example, when
* a display buffer is being scanned out.
*
* Compared with amdgpu_bo_pin(), this function gives more flexibility on
* where to pin a buffer if there are specific restrictions on where a buffer
* must be located.
*
* Returns:
* 0 for success or a negative error code on failure.
*/
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
u64 min_offset, u64 max_offset)
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
@ -905,9 +831,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
if (WARN_ON_ONCE(min_offset > max_offset))
return -EINVAL;
/* Check domain to be pinned to against preferred domains */
if (bo->preferred_domains & domain)
domain = bo->preferred_domains & domain;
@ -933,14 +856,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
ttm_bo_pin(&bo->tbo);
if (max_offset != 0) {
u64 domain_start = amdgpu_ttm_domain_start(adev,
mem_type);
WARN_ON_ONCE(max_offset <
(amdgpu_bo_gpu_offset(bo) - domain_start));
}
return 0;
}
@ -957,17 +872,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
unsigned int fpfn, lpfn;
fpfn = min_offset >> PAGE_SHIFT;
lpfn = max_offset >> PAGE_SHIFT;
if (fpfn > bo->placements[i].fpfn)
bo->placements[i].fpfn = fpfn;
if (!bo->placements[i].lpfn ||
(lpfn && lpfn < bo->placements[i].lpfn))
bo->placements[i].lpfn = lpfn;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
bo->placements[i].mem_type == TTM_PL_VRAM)
bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
@ -993,24 +897,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return r;
}
/**
* amdgpu_bo_pin - pin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be pinned
* @domain: domain to be pinned to
*
* A simple wrapper to amdgpu_bo_pin_restricted().
* Provides a simpler API for buffers that do not have any strict restrictions
* on where a buffer must be located.
*
* Returns:
* 0 for success or a negative error code on failure.
*/
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
}
/**
* amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
* @bo: &amdgpu_bo buffer object to be unpinned

View File

@ -136,8 +136,6 @@ struct amdgpu_bo_user {
struct amdgpu_bo_vm {
struct amdgpu_bo bo;
struct amdgpu_bo *shadow;
struct list_head shadow_list;
struct amdgpu_vm_bo_base entries[];
};
@ -275,22 +273,6 @@ static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
}
/**
* amdgpu_bo_shadowed - check if the BO is shadowed
*
* @bo: BO to be tested.
*
* Returns:
* NULL if not shadowed or else return a BO pointer.
*/
static inline struct amdgpu_bo *amdgpu_bo_shadowed(struct amdgpu_bo *bo)
{
if (bo->tbo.type == ttm_bo_type_kernel)
return to_amdgpu_bo_vm(bo)->shadow;
return NULL;
}
bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
@ -322,8 +304,6 @@ void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
u64 min_offset, u64 max_offset);
void amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
@ -349,9 +329,6 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct amdgpu_mem_stats *stats);
void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo);
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
struct dma_fence **fence);
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain);

View File

@ -2853,7 +2853,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp)
if (ret)
return ret;
/* Start rlc autoload after psp recieved all the gfx firmware */
/* Start rlc autoload after psp received all the gfx firmware */
if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload_start(psp);
@ -3425,9 +3425,11 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
int err = 0;
const struct psp_firmware_header_v2_1 *sos_hdr_v2_1;
int fw_index, fw_bin_count, start_index = 0;
const struct psp_fw_bin_desc *fw_bin;
uint8_t *ucode_array_start_addr;
int fw_index = 0;
int err = 0;
err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name);
if (err)
@ -3478,15 +3480,30 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
case 2:
sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data;
if (le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
dev_err(adev->dev, "packed SOS count exceeds maximum limit\n");
err = -EINVAL;
goto out;
}
for (fw_index = 0; fw_index < le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count); fw_index++) {
err = parse_sos_bin_descriptor(psp,
&sos_hdr_v2_0->psp_fw_bin[fw_index],
if (sos_hdr_v2_0->header.header_version_minor == 1) {
sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data;
fw_bin = sos_hdr_v2_1->psp_fw_bin;
if (psp_is_aux_sos_load_required(psp))
start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
else
fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
} else {
fw_bin = sos_hdr_v2_0->psp_fw_bin;
}
for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) {
err = parse_sos_bin_descriptor(psp, fw_bin + fw_index,
sos_hdr_v2_0);
if (err)
goto out;

View File

@ -138,6 +138,7 @@ struct psp_funcs {
int (*vbflash_stat)(struct psp_context *psp);
int (*fatal_error_recovery_quirk)(struct psp_context *psp);
bool (*get_ras_capability)(struct psp_context *psp);
bool (*is_aux_sos_load_required)(struct psp_context *psp);
};
struct ta_funcs {
@ -464,6 +465,9 @@ struct amdgpu_psp_funcs {
((psp)->funcs->fatal_error_recovery_quirk ? \
(psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
#define psp_is_aux_sos_load_required(psp) \
((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;

View File

@ -882,7 +882,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
if (ret)
return ret;
/* gfx block ras dsiable cmd must send to ras-ta */
/* gfx block ras disable cmd must send to ras-ta */
if (head->block == AMDGPU_RAS_BLOCK__GFX)
con->features |= BIT(head->block);
@ -3468,6 +3468,11 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
/* aca is disabled by default */
adev->aca.is_enabled = false;
/* bad page feature is not applicable to specific app platform */
if (adev->gmc.is_app_apu &&
amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0))
amdgpu_bad_page_threshold = 0;
}
static void amdgpu_ras_counte_dw(struct work_struct *work)

View File

@ -58,7 +58,7 @@
#define EEPROM_I2C_MADDR_4 0x40000
/*
* The 2 macros bellow represent the actual size in bytes that
* The 2 macros below represent the actual size in bytes that
* those entities occupy in the EEPROM memory.
* RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
* uses uint64 to store 6b fields such as retired_page.

View File

@ -260,6 +260,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
}
/**
* amdgpu_sync_kfd - sync to KFD fences
*
* @sync: sync object to add KFD fences to
* @resv: reservation object with KFD fences
*
* Extract all KFD fences and add them to the sync object.
*/
int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
{
struct dma_resv_iter cursor;
struct dma_fence *f;
int r = 0;
dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
dma_resv_for_each_fence_unlocked(&cursor, f) {
void *fence_owner = amdgpu_sync_get_owner(f);
if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
continue;
r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
dma_resv_iter_end(&cursor);
return r;
}
/* Free the entry back to the slab */
static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
{

View File

@ -51,6 +51,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);

View File

@ -1970,7 +1970,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
(unsigned int)(gtt_size / (1024 * 1024)));
/* Initiailize doorbell pool on PCI BAR */
/* Initialize doorbell pool on PCI BAR */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
if (r) {
DRM_ERROR("Failed initializing doorbell heap.\n");

View File

@ -136,6 +136,14 @@ struct psp_firmware_header_v2_0 {
struct psp_fw_bin_desc psp_fw_bin[];
};
/* version_major=2, version_minor=1 */
struct psp_firmware_header_v2_1 {
struct common_firmware_header header;
uint32_t psp_fw_bin_count;
uint32_t psp_aux_fw_bin_index;
struct psp_fw_bin_desc psp_fw_bin[];
};
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@ -426,6 +434,7 @@ union amdgpu_firmware_header {
struct psp_firmware_header_v1_1 psp_v1_1;
struct psp_firmware_header_v1_3 psp_v1_3;
struct psp_firmware_header_v2_0 psp_v2_0;
struct psp_firmware_header_v2_0 psp_v2_1;
struct ta_firmware_header_v1_0 ta;
struct ta_firmware_header_v2_0 ta_v2_0;
struct gfx_firmware_header_v1_0 gfx;
@ -447,7 +456,7 @@ union amdgpu_firmware_header {
uint8_t raw[0x100];
};
#define UCODE_MAX_PSP_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc))
#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2)
/*
* fw loading support

View File

@ -338,6 +338,7 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)

View File

@ -465,7 +465,6 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
{
uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
struct amdgpu_vm_bo_base *bo_base;
struct amdgpu_bo *shadow;
struct amdgpu_bo *bo;
int r;
@ -486,16 +485,10 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_unlock(&vm->status_lock);
bo = bo_base->bo;
shadow = amdgpu_bo_shadowed(bo);
r = validate(param, bo);
if (r)
return r;
if (shadow) {
r = validate(param, shadow);
if (r)
return r;
}
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
@ -1176,6 +1169,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
AMDGPU_SYNC_EQ_OWNER, vm);
if (r)
goto error_free;
if (bo) {
r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
if (r)
goto error_free;
}
} else {
struct drm_gem_object *obj = &bo->tbo.base;
@ -2149,10 +2148,6 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
{
struct amdgpu_vm_bo_base *bo_base;
/* shadow bo doesn't have bo base, its validation needs its parent */
if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo))
bo = bo->parent;
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
@ -2482,7 +2477,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
root_bo = amdgpu_bo_ref(&root->bo);
r = amdgpu_bo_reserve(root_bo, true);
if (r) {
amdgpu_bo_unref(&root->shadow);
amdgpu_bo_unref(&root_bo);
goto error_free_delayed;
}
@ -2575,11 +2569,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->last_update = dma_fence_get_stub();
vm->is_compute_context = true;
/* Free the shadow bo for compute VM */
amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
goto unreserve_bo;
unreserve_bo:
amdgpu_bo_unreserve(vm->root.bo);
return r;

View File

@ -383,14 +383,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
return r;
if (vmbo->shadow) {
struct amdgpu_bo *shadow = vmbo->shadow;
r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
if (r)
return r;
}
if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
@ -448,10 +440,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int32_t xcp_id)
{
struct amdgpu_bo_param bp;
struct amdgpu_bo *bo;
struct dma_resv *resv;
unsigned int num_entries;
int r;
memset(&bp, 0, sizeof(bp));
@ -484,42 +473,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv;
r = amdgpu_bo_create_vm(adev, &bp, vmbo);
if (r)
return r;
bo = &(*vmbo)->bo;
if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
(*vmbo)->shadow = NULL;
return 0;
}
if (!bp.resv)
WARN_ON(dma_resv_lock(bo->tbo.base.resv,
NULL));
resv = bp.resv;
memset(&bp, 0, sizeof(bp));
bp.size = amdgpu_vm_pt_size(adev, level);
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
bp.xcp_id_plus1 = xcp_id + 1;
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
if (!resv)
dma_resv_unlock(bo->tbo.base.resv);
if (r) {
amdgpu_bo_unref(&bo);
return r;
}
amdgpu_bo_add_to_shadow_list(*vmbo);
return 0;
return amdgpu_bo_create_vm(adev, &bp, vmbo);
}
/**
@ -569,7 +523,6 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
return 0;
error_free_pt:
amdgpu_bo_unref(&pt->shadow);
amdgpu_bo_unref(&pt_bo);
return r;
}
@ -581,17 +534,10 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
*/
static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
{
struct amdgpu_bo *shadow;
if (!entry->bo)
return;
entry->bo->vm_bo = NULL;
shadow = amdgpu_bo_shadowed(entry->bo);
if (shadow) {
ttm_bo_set_bulk_move(&shadow->tbo, NULL);
amdgpu_bo_unref(&shadow);
}
ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
spin_lock(&entry->vm->status_lock);

View File

@ -35,16 +35,7 @@
*/
static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
{
int r;
r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
if (r)
return r;
if (table->shadow)
r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
return r;
return amdgpu_ttm_alloc_gart(&table->bo.tbo);
}
/* Allocate a new job for @count PTE updates */
@ -265,17 +256,13 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
if (!p->pages_addr) {
/* set page commands needed */
if (vmbo->shadow)
amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
count, incr, flags);
amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
incr, flags);
return 0;
}
/* copy commands needed */
ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
(vmbo->shadow ? 2 : 1);
ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
/* for padding */
ndw -= 7;
@ -290,8 +277,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
pte[i] |= flags;
}
if (vmbo->shadow)
amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
pe += nptes * 8;

View File

@ -180,6 +180,6 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
#define for_each_xcp(xcp_mgr, xcp, i) \
for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
#endif

View File

@ -94,8 +94,6 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
case AMDGPU_RING_TYPE_VCN_ENC:
case AMDGPU_RING_TYPE_VCN_JPEG:
ip_blk = AMDGPU_XCP_VCN;
if (aqua_vanjaram_xcp_vcn_shared(adev))
inst_mask = 1 << (inst_idx * 2);
break;
default:
DRM_ERROR("Not support ring type %d!", ring->funcs->type);
@ -105,6 +103,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
ring->xcp_id = xcp_id;
dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name,
ring->xcp_id);
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id;
break;
@ -394,38 +394,31 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
struct amdgpu_xcp_ip *ip)
{
struct amdgpu_device *adev = xcp_mgr->adev;
int num_sdma, num_vcn, num_shared_vcn, num_xcp;
int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
int num_sdma, num_vcn;
num_sdma = adev->sdma.num_instances;
num_vcn = adev->vcn.num_vcn_inst;
num_shared_vcn = 1;
num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp;
switch (xcp_mgr->mode) {
case AMDGPU_SPX_PARTITION_MODE:
num_sdma_xcp = num_sdma;
num_vcn_xcp = num_vcn;
break;
case AMDGPU_DPX_PARTITION_MODE:
num_sdma_xcp = num_sdma / 2;
num_vcn_xcp = num_vcn / 2;
break;
case AMDGPU_TPX_PARTITION_MODE:
num_sdma_xcp = num_sdma / 3;
num_vcn_xcp = num_vcn / 3;
break;
case AMDGPU_QPX_PARTITION_MODE:
num_sdma_xcp = num_sdma / 4;
num_vcn_xcp = num_vcn / 4;
break;
case AMDGPU_CPX_PARTITION_MODE:
num_sdma_xcp = 2;
num_vcn_xcp = num_vcn ? 1 : 0;
num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp);
num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp);
break;
default:
return -EINVAL;
}
num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
if (num_vcn && num_xcp > num_vcn)
num_shared_vcn = num_xcp / num_vcn;
switch (ip_id) {
case AMDGPU_XCP_GFXHUB:
@ -441,7 +434,8 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
break;
case AMDGPU_XCP_VCN:
ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id);
ip->inst_mask =
XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn);
/* TODO : Assign IP funcs */
break;
default:

View File

@ -1881,6 +1881,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@ -2401,6 +2402,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {

View File

@ -1931,6 +1931,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@ -2485,6 +2486,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {

View File

@ -1861,6 +1861,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@ -2321,6 +2322,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {

View File

@ -1828,6 +1828,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
return r;
if (!atomic) {
abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
@ -2320,6 +2321,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {

View File

@ -202,12 +202,16 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
};
static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
};
static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
};
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
@ -3495,10 +3499,14 @@ static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
soc15_program_register_sequence(adev,
golden_settings_gc_12_0,
(const u32)ARRAY_SIZE(golden_settings_gc_12_0));
if (adev->rev_id == 0)
soc15_program_register_sequence(adev,
golden_settings_gc_12_0,
(const u32)ARRAY_SIZE(golden_settings_gc_12_0));
golden_settings_gc_12_0_rev0,
(const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0));
break;
default:
break;

View File

@ -1701,7 +1701,15 @@ static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev,
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0);
} else {
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
(CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
CP_MEC_CNTL__MEC_ME1_HALT_MASK |
CP_MEC_CNTL__MEC_ME2_HALT_MASK));
adev->gfx.kiq[xcc_id].ring.sched.ready = false;
}
udelay(50);
@ -2240,6 +2248,8 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id);
if (r)
return r;
} else {
gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
}
r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id);
@ -2299,12 +2309,6 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev)
return 0;
}
static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable,
int xcc_id)
{
gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id);
}
static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
{
if (amdgpu_gfx_disable_kcq(adev, xcc_id))
@ -2336,7 +2340,7 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
}
gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id);
gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id);
gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
}
static int gfx_v9_4_3_hw_init(void *handle)

View File

@ -153,7 +153,7 @@ static void imu_v11_0_setup(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
}
//disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
//disable imu Rtavfs, SmsRepair, DfllBTC, and ClkB
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
imu_reg_val |= 0x10007;
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);

View File

@ -161,7 +161,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
int api_status_off)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long timeout = 3000000; /* 3000 ms */
signed long timeout = 2100000; /* 2100 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring[0];
struct MES_API_STATUS *api_status;

View File

@ -146,7 +146,7 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
int api_status_off)
{
union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long timeout = 3000000; /* 3000 ms */
signed long timeout = 2100000; /* 2100 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring[pipe];
spinlock_t *ring_lock = &mes->ring_lock[pipe];
@ -479,6 +479,11 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
union MESAPI__MISC misc_pkt;
int pipe;
if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
memset(&misc_pkt, 0, sizeof(misc_pkt));
misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
@ -513,6 +518,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
break;
case MES_MISC_OP_SET_SHADER_DEBUGGER:
pipe = AMDGPU_MES_SCHED_PIPE;
misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
misc_pkt.set_shader_debugger.process_context_addr =
input->set_shader_debugger.process_context_addr;
@ -530,11 +536,6 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
return -EINVAL;
}
if (mes->adev->enable_uni_mes)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&misc_pkt, sizeof(misc_pkt),
offsetof(union MESAPI__MISC, api_status));
@ -608,6 +609,7 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
/*
* Keep oversubscribe timer for sdma . When we have unmapped doorbell

View File

@ -365,7 +365,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
} else {
/* Disbale ASPM L1 */
/* Disable ASPM L1 */
data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
/* Disable ASPM TxL0s */
data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;

View File

@ -81,6 +81,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
/* memory training timeout define */
#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
#define regMP1_PUB_SCRATCH0 0x3b10090
static int psp_v13_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@ -807,6 +809,20 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
}
}
static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
u32 pmfw_ver;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
return false;
/* load 4e version of sos if pmfw version less than 85.115.0 */
pmfw_ver = RREG32(regMP1_PUB_SCRATCH0 / 4);
return (pmfw_ver < 0x557300);
}
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@ -830,6 +846,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.vbflash_stat = psp_v13_0_vbflash_status,
.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
.get_ras_capability = psp_v13_0_get_ras_capability,
.is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
};
void psp_v13_0_set_psp_funcs(struct psp_context *psp)

View File

@ -710,7 +710,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
if (ring->use_pollmem) {
/*wptr polling is not enogh fast, directly clean the wptr register */
/*wptr polling is not enough fast, directly clean the wptr register */
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,

View File

@ -1080,13 +1080,16 @@ static void sdma_v7_0_vm_copy_pte(struct amdgpu_ib *ib,
unsigned bytes = count * 8;
ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
SDMA_PKT_COPY_LINEAR_HEADER_CPV(1);
ib->ptr[ib->length_dw++] = bytes - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src);
ib->ptr[ib->length_dw++] = upper_32_bits(src);
ib->ptr[ib->length_dw++] = lower_32_bits(pe);
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = 0;
}
@ -1744,7 +1747,7 @@ static void sdma_v7_0_set_buffer_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_vm_pte_funcs sdma_v7_0_vm_pte_funcs = {
.copy_pte_num_dw = 7,
.copy_pte_num_dw = 8,
.copy_pte = sdma_v7_0_vm_copy_pte,
.write_pte = sdma_v7_0_vm_write_pte,
.set_pte_pde = sdma_v7_0_vm_set_pte_pde,

View File

@ -60,7 +60,7 @@ static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *f
{
u32 data;
/* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */
/* CGTT_ROM_CLK_CTRL0 is not available for APUs */
if (adev->flags & AMD_IS_APU)
return;

View File

@ -250,13 +250,6 @@ static void soc24_program_aspm(struct amdgpu_device *adev)
adev->nbio.funcs->program_aspm(adev);
}
static void soc24_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
}
const struct amdgpu_ip_block_version soc24_common_ip_block = {
.type = AMD_IP_BLOCK_TYPE_COMMON,
.major = 1,
@ -454,6 +447,11 @@ static int soc24_common_late_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_get_irq(adev);
/* Enable selfring doorbell aperture late because doorbell BAR
* aperture will change if resize BAR successfully in gmc sw_init.
*/
adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, true);
return 0;
}
@ -491,7 +489,7 @@ static int soc24_common_hw_init(void *handle)
adev->df.funcs->hw_init(adev);
/* enable the doorbell aperture */
soc24_enable_doorbell_aperture(adev, true);
adev->nbio.funcs->enable_doorbell_aperture(adev, true);
return 0;
}
@ -500,8 +498,13 @@ static int soc24_common_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* disable the doorbell aperture */
soc24_enable_doorbell_aperture(adev, false);
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because soc21_enable_doorbell_aperture
* has been removed and there is no need to delay disabling
* selfring doorbell.
*/
adev->nbio.funcs->enable_doorbell_aperture(adev, false);
adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_put_irq(adev);

View File

@ -1395,170 +1395,6 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p,
struct amdgpu_job *job)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
if (atomic_read(&job->base.entity->fence_seq))
return -EINVAL;
/* if VCN0 is harvested, we can't support AV1 */
if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
[AMDGPU_RING_PRIO_0].sched;
drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
return 0;
}
static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
uint32_t *msg, num_buffers;
struct amdgpu_bo *bo;
uint64_t start, end;
unsigned int i;
void *ptr;
int r;
addr &= AMDGPU_GMC_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
if (r) {
DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
start = map->start * AMDGPU_GPU_PAGE_SIZE;
end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
if (addr & 0x7) {
DRM_ERROR("VCN messages must be 8 byte aligned!\n");
return -EINVAL;
}
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r) {
DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
return r;
}
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
return r;
}
msg = ptr + addr - start;
/* Check length */
if (msg[1] > end - addr) {
r = -EINVAL;
goto out;
}
if (msg[3] != RDECODE_MSG_CREATE)
goto out;
num_buffers = msg[2];
for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
uint32_t offset, size, *create;
if (msg[0] != RDECODE_MESSAGE_CREATE)
continue;
offset = msg[1];
size = msg[2];
if (offset + size > end) {
r = -EINVAL;
goto out;
}
create = ptr + addr + offset - start;
/* H264, HEVC and VP9 can run on any instance */
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
r = vcn_v4_0_5_limit_sched(p, job);
if (r)
goto out;
}
out:
amdgpu_bo_kunmap(bo);
return r;
}
#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
#define RADEON_VCN_ENGINE_INFO (0x30000001)
#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16
#define RENCODE_ENCODE_STANDARD_AV1 2
#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64
/* return the offset in ib if id is found, -1 otherwise
* to speed up the searching we only search upto max_offset
*/
static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
{
int i;
for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
if (ib->ptr[i + 1] == id)
return i;
}
return -1;
}
static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_ring *ring = amdgpu_job_ring(job);
struct amdgpu_vcn_decode_buffer *decode_buffer;
uint64_t addr;
uint32_t val;
int idx;
/* The first instance can decode anything */
if (!ring->me)
return 0;
/* RADEON_VCN_ENGINE_INFO is at the top of ib block */
idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
if (idx < 0) /* engine info is missing */
return 0;
val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
if (!(decode_buffer->valid_buf_flag & 0x1))
return 0;
addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
decode_buffer->msg_buffer_address_lo;
return vcn_v4_0_5_dec_msg(p, job, addr);
} else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
return vcn_v4_0_5_limit_sched(p, job);
}
return 0;
}
static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@ -1566,7 +1402,6 @@ static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.get_rptr = vcn_v4_0_5_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_5_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_5_unified_ring_set_wptr,
.patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +

View File

@ -3540,6 +3540,30 @@ int debug_refresh_runlist(struct device_queue_manager *dqm)
return debug_map_and_unlock(dqm);
}
bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
int doorbell_off, u32 *queue_format)
{
struct queue *q;
bool r = false;
if (!queue_format)
return r;
dqm_lock(dqm);
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.doorbell_off == doorbell_off) {
*queue_format = q->properties.format;
r = true;
goto out;
}
}
out:
dqm_unlock(dqm);
return r;
}
#if defined(CONFIG_DEBUG_FS)
static void seq_reg_dump(struct seq_file *m,

View File

@ -324,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q,
int debug_lock_and_unmap(struct device_queue_manager *dqm);
int debug_map_and_unlock(struct device_queue_manager *dqm);
int debug_refresh_runlist(struct device_queue_manager *dqm);
bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
int doorbell_off, u32 *queue_format);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{

View File

@ -306,23 +306,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
client_id == SOC15_IH_CLIENTID_UTCL2) {
struct kfd_vm_fault_info info = {0};
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry);
int hub_inst = 0;
struct kfd_hsa_memory_exception_data exception_data;
/* gfxhub */
if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) {
hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev,
node_id);
if (hub_inst < 0)
hub_inst = 0;
}
/* mmhub */
if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC)
hub_inst = node_id / 4;
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |

View File

@ -341,6 +341,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdmax_rlcx_doorbell_offset =
q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
<< SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
& SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;

View File

@ -270,6 +270,11 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
struct kfd_node *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
int i;
struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES];
u32 queue_format;
memset(cu_occupancy, 0x0, sizeof(cu_occupancy));
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
@ -287,8 +292,29 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
&max_waves_per_cu, 0);
/*
* For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
* For AQL queues, because of cooperative dispatch we multiply the wave count
* by number of XCCs in the partition to get the total wave counts across all
* XCCs in the partition.
* For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
*/
dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
&max_waves_per_cu, ffs(dev->xcc_mask) - 1);
for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
if (cu_occupancy[i].wave_cnt != 0 &&
kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
cu_occupancy[i].doorbell_off,
&queue_format)) {
if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
wave_cnt += cu_occupancy[i].wave_cnt;
else
wave_cnt += (NUM_XCC(dev->xcc_mask) *
cu_occupancy[i].wave_cnt);
}
}
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;

View File

@ -517,7 +517,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval)
goto err_destroy_queue;
kfd_procfs_del_queue(pqn->q);
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
@ -527,6 +526,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval != -ETIME)
goto err_destroy_queue;
}
kfd_procfs_del_queue(pqn->q);
kfd_queue_release_buffers(pdd, &pqn->q->properties);
pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);

View File

@ -807,6 +807,20 @@ static void dmub_hpd_callback(struct amdgpu_device *adev,
}
}
/**
* dmub_hpd_sense_callback - DMUB HPD sense processing callback.
* @adev: amdgpu_device pointer
* @notify: dmub notification structure
*
* HPD sense changes can occur during low power states and need to be
* notified from firmware to driver.
*/
static void dmub_hpd_sense_callback(struct amdgpu_device *adev,
struct dmub_notification *notify)
{
DRM_DEBUG_DRIVER("DMUB HPD SENSE callback.\n");
}
/**
* register_dmub_notify_callback - Sets callback for DMUB notify
* @adev: amdgpu_device pointer
@ -1757,25 +1771,41 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *
static enum dmub_ips_disable_type dm_get_default_ips_mode(
struct amdgpu_device *adev)
{
/*
* On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to
* cause a hard hang. A fix exists for newer PMFW.
*
* As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest
* IPS state in all cases, except for s0ix and all displays off (DPMS),
* where IPS2 is allowed.
*
* When checking pmfw version, use the major and minor only.
*/
if (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(3, 5, 0) &&
(adev->pm.fw_version & 0x00FFFF00) < 0x005D6300)
return DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
enum dmub_ips_disable_type ret = DMUB_IPS_ENABLE;
if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
return DMUB_IPS_ENABLE;
switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
case IP_VERSION(3, 5, 0):
/*
* On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to
* cause a hard hang. A fix exists for newer PMFW.
*
* As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest
* IPS state in all cases, except for s0ix and all displays off (DPMS),
* where IPS2 is allowed.
*
* When checking pmfw version, use the major and minor only.
*/
if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300)
ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0))
/*
* Other ASICs with DCN35 that have residency issues with
* IPS2 in idle.
* We want them to use IPS2 only in display off cases.
*/
ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
break;
case IP_VERSION(3, 5, 1):
ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
break;
default:
/* ASICs older than DCN35 do not have IPSs */
if (amdgpu_ip_version(adev, DCE_HWIP, 0) < IP_VERSION(3, 5, 0))
ret = DMUB_IPS_DISABLE_ALL;
break;
}
/* ASICs older than DCN35 do not have IPSs */
return DMUB_IPS_DISABLE_ALL;
return ret;
}
static int amdgpu_dm_init(struct amdgpu_device *adev)
@ -3808,6 +3838,12 @@ static int register_hpd_handlers(struct amdgpu_device *adev)
DRM_ERROR("amdgpu: fail to register dmub hpd callback");
return -EINVAL;
}
if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_SENSE_NOTIFY,
dmub_hpd_sense_callback, true)) {
DRM_ERROR("amdgpu: fail to register dmub hpd sense callback");
return -EINVAL;
}
}
list_for_each_entry(connector,
@ -4449,6 +4485,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev)
#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12
#define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
#define AMDGPU_DM_MIN_SPREAD ((AMDGPU_DM_DEFAULT_MAX_BACKLIGHT - AMDGPU_DM_DEFAULT_MIN_BACKLIGHT) / 2)
#define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
@ -4463,6 +4500,21 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
return;
amdgpu_acpi_get_backlight_caps(&caps);
/* validate the firmware value is sane */
if (caps.caps_valid) {
int spread = caps.max_input_signal - caps.min_input_signal;
if (caps.max_input_signal > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
caps.min_input_signal < 0 ||
spread > AMDGPU_DM_DEFAULT_MAX_BACKLIGHT ||
spread < AMDGPU_DM_MIN_SPREAD) {
DRM_DEBUG_KMS("DM: Invalid backlight caps: min=%d, max=%d\n",
caps.min_input_signal, caps.max_input_signal);
caps.caps_valid = false;
}
}
if (caps.caps_valid) {
dm->backlight_caps[bl_idx].caps_valid = true;
if (caps.aux_support)

View File

@ -50,7 +50,7 @@
#define AMDGPU_DM_MAX_NUM_EDP 2
#define AMDGPU_DMUB_NOTIFICATION_MAX 6
#define AMDGPU_DMUB_NOTIFICATION_MAX 7
#define HDMI_AMD_VENDOR_SPECIFIC_DATA_BLOCK_IEEE_REGISTRATION_ID 0x00001A
#define AMD_VSDB_VERSION_3_FEATURECAP_REPLAYMODE 0x40

View File

@ -1147,7 +1147,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
params[count].num_slices_v = aconnector->dsc_settings.dsc_num_slices_v;
params[count].bpp_overwrite = aconnector->dsc_settings.dsc_bits_per_pixel;
params[count].compression_possible = stream->sink->dsc_caps.dsc_dec_caps.is_dsc_supported;
dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy);
dc_dsc_get_policy_for_timing(params[count].timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
if (!dc_dsc_compute_bandwidth_range(
stream->sink->ctx->dc->res_pool->dscs[0],
stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
@ -1681,7 +1681,7 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream,
{
struct dc_dsc_policy dsc_policy = {0};
dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy);
dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy, dc_link_get_highest_encoding_format(stream->link));
dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0],
stream->sink->ctx->dc->debug.dsc_min_slice_height_override,
dsc_policy.min_target_bpp * 16,

View File

@ -961,6 +961,7 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane,
else
domain = AMDGPU_GEM_DOMAIN_VRAM;
rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)

View File

@ -114,6 +114,7 @@ static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector
domain = amdgpu_display_supported_domains(adev, rbo->flags);
rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
r = amdgpu_bo_pin(rbo, domain);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)

View File

@ -569,7 +569,7 @@ static void calculate_bandwidth(
break;
}
data->lb_partitions[i] = bw_floor2(bw_div(data->lb_size_per_component[i], data->lb_line_pitch), bw_int_to_fixed(1));
/*clamp the partitions to the maxium number supported by the lb*/
/* clamp the partitions to the maximum number supported by the lb */
if ((surface_type[i] != bw_def_graphics || dceip->graphics_lb_nodownscaling_multi_line_prefetching == 1)) {
data->lb_partitions_max[i] = bw_int_to_fixed(10);
}

View File

@ -59,6 +59,7 @@ int clk_mgr_helper_get_active_display_cnt(
display_count = 0;
for (i = 0; i < context->stream_count; i++) {
const struct dc_stream_state *stream = context->streams[i];
const struct dc_stream_status *stream_status = &context->stream_status[i];
/* Don't count SubVP phantom pipes as part of active
* display count
@ -66,13 +67,7 @@ int clk_mgr_helper_get_active_display_cnt(
if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM)
continue;
/*
* Only notify active stream or virtual stream.
* Need to notify virtual stream to work around
* headless case. HPD does not fire when system is in
* S0i2.
*/
if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL)
if (!stream->dpms_off || (stream_status && stream_status->plane_count))
display_count++;
}

View File

@ -1222,6 +1222,12 @@ void dcn35_clk_mgr_construct(
ctx->dc->debug.disable_dpp_power_gate = false;
ctx->dc->debug.disable_hubp_power_gate = false;
ctx->dc->debug.disable_dsc_power_gate = false;
/* Disable dynamic IPS2 in older PMFW (93.12) for Z8 interop. */
if (ctx->dc->config.disable_ips == DMUB_IPS_ENABLE &&
ctx->dce_version == DCN_VERSION_3_5 &&
((clk_mgr->base.smu_ver & 0x00FFFFFF) <= 0x005d0c00))
ctx->dc->config.disable_ips = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF;
} else {
/*let's reset the config control flag*/
ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/

View File

@ -1767,7 +1767,7 @@ bool dc_validate_boot_timing(const struct dc *dc,
if (crtc_timing->pix_clk_100hz != pix_clk_100hz)
return false;
if (!se->funcs->dp_get_pixel_format)
if (!se || !se->funcs->dp_get_pixel_format)
return false;
if (!se->funcs->dp_get_pixel_format(
@ -2376,7 +2376,7 @@ static bool is_surface_in_context(
return false;
}
static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
static enum surface_update_type get_plane_info_update_type(const struct dc *dc, const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
enum surface_update_type update_type = UPDATE_TYPE_FAST;
@ -2455,7 +2455,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa
/* todo: below are HW dependent, we should add a hook to
* DCE/N resource and validated there.
*/
if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
if (!dc->debug.skip_full_updated_if_possible) {
/* swizzled mode requires RQ to be setup properly,
* thus need to run DML to calculate RQ settings
*/
@ -2547,7 +2547,7 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
update_flags->raw = 0; // Reset all flags
type = get_plane_info_update_type(u);
type = get_plane_info_update_type(dc, u);
elevate_update_type(&overall_type, type);
type = get_scaling_info_update_type(dc, u);
@ -2596,6 +2596,12 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
elevate_update_type(&overall_type, UPDATE_TYPE_MED);
}
if (u->sdr_white_level_nits)
if (u->sdr_white_level_nits != u->surface->sdr_white_level_nits) {
update_flags->bits.sdr_white_level_nits = 1;
elevate_update_type(&overall_type, UPDATE_TYPE_FULL);
}
if (u->cm2_params) {
if ((u->cm2_params->component_settings.shaper_3dlut_setting
!= u->surface->mcm_shaper_3dlut_setting)
@ -2876,6 +2882,10 @@ static void copy_surface_update_to_plane(
surface->hdr_mult =
srf_update->hdr_mult;
if (srf_update->sdr_white_level_nits)
surface->sdr_white_level_nits =
srf_update->sdr_white_level_nits;
if (srf_update->blend_tf)
memcpy(&surface->blend_tf, srf_update->blend_tf,
sizeof(surface->blend_tf));
@ -4679,6 +4689,8 @@ static bool full_update_required(struct dc *dc,
srf_updates[i].scaling_info ||
(srf_updates[i].hdr_mult.value &&
srf_updates[i].hdr_mult.value != srf_updates->surface->hdr_mult.value) ||
(srf_updates[i].sdr_white_level_nits &&
srf_updates[i].sdr_white_level_nits != srf_updates->surface->sdr_white_level_nits) ||
srf_updates[i].in_transfer_func ||
srf_updates[i].func_shaper ||
srf_updates[i].lut3d_func ||
@ -5743,6 +5755,27 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
return DC_OK;
}
/**
* dc_process_dmub_dpia_set_tps_notification - Submits tps notification
*
* @dc: [in] dc structure
* @link_index: [in] link index
* @tps: [in] request tps
*
* Submits set_tps_notification command to dmub via inbox message
*/
void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps)
{
union dmub_rb_cmd cmd = {0};
cmd.set_tps_notification.header.type = DMUB_CMD__DPIA;
cmd.set_tps_notification.header.sub_type = DMUB_CMD__DPIA_SET_TPS_NOTIFICATION;
cmd.set_tps_notification.tps_notification.instance = dc->links[link_index]->ddc_hw_inst;
cmd.set_tps_notification.tps_notification.tps = tps;
dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
}
/**
* dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption
*

View File

@ -55,7 +55,7 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
#define DC_VER "3.2.299"
#define DC_VER "3.2.301"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@ -462,6 +462,7 @@ struct dc_config {
bool support_edp0_on_dp1;
unsigned int enable_fpo_flicker_detection;
bool disable_hbr_audio_dp2;
bool consolidated_dpia_dp_lt;
};
enum visual_confirm {
@ -762,7 +763,8 @@ union dpia_debug_options {
uint32_t disable_mst_dsc_work_around:1; /* bit 3 */
uint32_t enable_force_tbt3_work_around:1; /* bit 4 */
uint32_t disable_usb4_pm_support:1; /* bit 5 */
uint32_t reserved:26;
uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */
uint32_t reserved:25;
} bits;
uint32_t raw;
};
@ -1056,6 +1058,9 @@ struct dc_debug_options {
unsigned int force_lls;
bool notify_dpia_hr_bw;
bool enable_ips_visual_confirm;
unsigned int sharpen_policy;
unsigned int scale_to_sharpness_policy;
bool skip_full_updated_if_possible;
};
@ -1269,6 +1274,7 @@ union surface_update_flags {
uint32_t tmz_changed:1;
uint32_t mcm_transfer_function_enable_change:1; /* disable or enable MCM transfer func */
uint32_t full_update:1;
uint32_t sdr_white_level_nits:1;
} bits;
uint32_t raw;
@ -1351,6 +1357,7 @@ struct dc_plane_state {
bool adaptive_sharpness_en;
int sharpness_level;
enum linear_light_scaling linear_light_scaling;
unsigned int sdr_white_level_nits;
};
struct dc_plane_info {
@ -1508,6 +1515,7 @@ struct dc_surface_update {
*/
struct dc_cm2_parameters *cm2_params;
const struct dc_csc_transform *cursor_csc_color_matrix;
unsigned int sdr_white_level_nits;
};
/*
@ -2520,6 +2528,8 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
uint8_t mst_alloc_slots,
uint8_t *mst_slots_in_use);
void dc_process_dmub_dpia_set_tps_notification(const struct dc *dc, uint32_t link_index, uint8_t tps);
void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
uint32_t hpd_int_enable);

View File

@ -969,6 +969,14 @@ union dp_sink_video_fallback_formats {
uint8_t raw;
};
union dpcd_max_uncompressed_pixel_rate_cap {
struct {
uint16_t max_uncompressed_pixel_rate_cap :15;
uint16_t valid :1;
} bits;
uint8_t raw[2];
};
union dp_fec_capability1 {
struct {
uint8_t AGGREGATED_ERROR_COUNTERS_CAPABLE :1;
@ -1170,6 +1178,7 @@ struct dpcd_caps {
struct dc_lttpr_caps lttpr_caps;
struct adaptive_sync_caps adaptive_sync_caps;
struct dpcd_usb4_dp_tunneling_info usb4_dp_tun_info;
union dpcd_max_uncompressed_pixel_rate_cap max_uncompressed_pixel_rate_cap;
union dp_128b_132b_supported_link_rates dp_128b_132b_supported_link_rates;
union dp_main_line_channel_coding_cap channel_coding_cap;
@ -1340,6 +1349,9 @@ struct dp_trace {
#ifndef DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX
#define DP_CABLE_ATTRIBUTES_UPDATED_BY_DPTX 0x110
#endif
#ifndef DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP
#define DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP 0x221c
#endif
#ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE
#define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50
#endif

View File

@ -59,6 +59,7 @@ struct dc_dsc_config_options {
uint32_t max_target_bpp_limit_override_x16;
uint32_t slice_height_granularity;
uint32_t dsc_force_odm_hslice_override;
bool force_dsc_when_not_needed;
};
bool dc_dsc_parse_dsc_dpcd(const struct dc *dc,
@ -100,7 +101,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
*/
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
struct dc_dsc_policy *policy);
struct dc_dsc_policy *policy,
const enum dc_link_encoding_format link_encoding);
void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit);

View File

@ -186,19 +186,17 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl
spl_in->h_active = pipe_ctx->plane_res.scl_data.h_active;
spl_in->v_active = pipe_ctx->plane_res.scl_data.v_active;
spl_in->debug.sharpen_policy = (enum sharpen_policy)pipe_ctx->stream->ctx->dc->debug.sharpen_policy;
spl_in->debug.scale_to_sharpness_policy =
(enum scale_to_sharpness_policy)pipe_ctx->stream->ctx->dc->debug.scale_to_sharpness_policy;
/* Check if it is stream is in fullscreen and if its HDR.
* Use this to determine sharpness levels
*/
spl_in->is_fullscreen = dm_helpers_is_fullscreen(pipe_ctx->stream->ctx, pipe_ctx->stream);
spl_in->is_hdr_on = dm_helpers_is_hdr_on(pipe_ctx->stream->ctx, pipe_ctx->stream);
spl_in->hdr_multx100 = 0;
if (spl_in->is_hdr_on) {
spl_in->hdr_multx100 = (uint32_t)dc_fixpt_floor(dc_fixpt_mul(plane_state->hdr_mult,
dc_fixpt_from_int(100)));
/* Disable sharpness for HDR Mult > 6.0 */
if (spl_in->hdr_multx100 > 600)
spl_in->adaptive_sharpness.enable = false;
}
spl_in->sdr_white_level_nits = plane_state->sdr_white_level_nits;
}
/// @brief Translate SPL output parameters to pipe context

View File

@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
if (req128_c && log2_swath_height_c > 0)
log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;

View File

@ -313,9 +313,6 @@ static void handle_det_buf_split(struct display_mode_lib *mode_lib,
if (swath_height_c > 0)
log2_swath_height_c = dml_log2(swath_height_c);
if (req128_c && log2_swath_height_c > 0)
log2_swath_height_c -= 1;
}
rq_param->dlg.rq_l.swath_height = 1 << log2_swath_height_l;

View File

@ -1924,15 +1924,6 @@ static unsigned int CalculateVMAndRowBytes(
*PixelPTEReqWidth = 32768.0 / BytePerPixel;
*PTERequestSize = 64;
FractionOfPTEReturnDrop = 0;
} else if (MacroTileSizeBytes == 4096) {
PixelPTEReqHeightPTEs = 1;
*PixelPTEReqHeight = MacroTileHeight;
*PixelPTEReqWidth = 8 * *MacroTileWidth;
*PTERequestSize = 64;
if (ScanDirection != dm_vert)
FractionOfPTEReturnDrop = 0;
else
FractionOfPTEReturnDrop = 7.0 / 8;
} else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
PixelPTEReqHeightPTEs = 16;
*PixelPTEReqHeight = 16 * BlockHeight256Bytes;

View File

@ -8926,7 +8926,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
// The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement
// If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature
// if possible, then will try to program for the best power saving features in order of diffculty (dram, fclk, stutter)
// if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter)
s->iteration = 0;
s->MaxTotalRDBandwidth = 0;
s->AllPrefetchModeTested = false;
@ -9977,7 +9977,7 @@ void dml_core_get_row_heights(
dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
#endif
// just suppluy with enough parameters to calculate meta and dte
// just supply with enough parameters to calculate meta and dte
CalculateVMAndRowBytes(
0, // dml_bool_t ViewportStationary,
1, // dml_bool_t DCCEnable,
@ -10110,7 +10110,7 @@ dml_bool_t dml_mode_support(
/// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK
/// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values.
/// @param state_idx Power state idx chosen
/// @param display_cfg Display Congiuration
/// @param display_cfg Display Configuration
/// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming
/// TODO: Add clk_cfg input, could be useful for standalone mode
dml_bool_t dml_mode_programming(

View File

@ -858,7 +858,9 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
plane->immediate_flip = plane_state->flip_immediate;
plane->composition.rect_out_height_spans_vactive = plane_state->dst_rect.height >= stream->timing.v_addressable;
plane->composition.rect_out_height_spans_vactive =
plane_state->dst_rect.height >= stream->timing.v_addressable &&
stream->dst.height >= stream->timing.v_addressable;
}
//TODO : Could be possibly moved to a common helper layer.

View File

@ -940,9 +940,11 @@ static void build_synchronized_timing_groups(
/* find synchronizable timing groups */
for (j = i + 1; j < display_config->display_config.num_streams; j++) {
if (memcmp(master_timing,
&display_config->display_config.stream_descriptors[j].timing,
sizeof(struct dml2_timing_cfg)) == 0 &&
display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder) {
&display_config->display_config.stream_descriptors[j].timing,
sizeof(struct dml2_timing_cfg)) == 0 &&
display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder &&
(display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match
display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) {
set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j);
set_bit_in_bitfield(&stream_mapped_mask, j);
}

View File

@ -668,6 +668,7 @@ static bool decide_dsc_bandwidth_range(
*/
static bool decide_dsc_target_bpp_x16(
const struct dc_dsc_policy *policy,
const struct dc_dsc_config_options *options,
const struct dsc_enc_caps *dsc_common_caps,
const int target_bandwidth_kbps,
const struct dc_crtc_timing *timing,
@ -682,7 +683,7 @@ static bool decide_dsc_target_bpp_x16(
if (decide_dsc_bandwidth_range(policy->min_target_bpp * 16, policy->max_target_bpp * 16,
num_slices_h, dsc_common_caps, timing, link_encoding, &range)) {
if (target_bandwidth_kbps >= range.stream_kbps) {
if (policy->enable_dsc_when_not_needed)
if (policy->enable_dsc_when_not_needed || options->force_dsc_when_not_needed)
/* enable max bpp even dsc is not needed */
*target_bpp_x16 = range.max_target_bpp_x16;
} else if (target_bandwidth_kbps >= range.max_kbps) {
@ -882,7 +883,7 @@ static bool setup_dsc_config(
memset(dsc_cfg, 0, sizeof(struct dc_dsc_config));
dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy);
dc_dsc_get_policy_for_timing(timing, options->max_target_bpp_limit_override_x16, &policy, link_encoding);
pic_width = timing->h_addressable + timing->h_border_left + timing->h_border_right;
pic_height = timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
@ -1080,6 +1081,7 @@ static bool setup_dsc_config(
if (target_bandwidth_kbps > 0) {
is_dsc_possible = decide_dsc_target_bpp_x16(
&policy,
options,
&dsc_common_caps,
target_bandwidth_kbps,
timing,
@ -1171,7 +1173,8 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
uint32_t max_target_bpp_limit_override_x16,
struct dc_dsc_policy *policy)
struct dc_dsc_policy *policy,
const enum dc_link_encoding_format link_encoding)
{
uint32_t bpc = 0;
@ -1235,10 +1238,7 @@ void dc_dsc_get_policy_for_timing(const struct dc_crtc_timing *timing,
policy->max_target_bpp = max_target_bpp_limit_override_x16 / 16;
/* enable DSC when not needed, default false */
if (dsc_policy_enable_dsc_when_not_needed)
policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed;
else
policy->enable_dsc_when_not_needed = false;
policy->enable_dsc_when_not_needed = dsc_policy_enable_dsc_when_not_needed;
}
void dc_dsc_policy_set_max_target_bpp_limit(uint32_t limit)
@ -1267,4 +1267,5 @@ void dc_dsc_get_default_config_option(const struct dc *dc, struct dc_dsc_config_
options->dsc_force_odm_hslice_override = dc->debug.force_odm_combine;
options->max_target_bpp_limit_override_x16 = 0;
options->slice_height_granularity = 1;
options->force_dsc_when_not_needed = false;
}

View File

@ -545,6 +545,7 @@ static void hubbub35_init(struct hubbub *hubbub)
DCHUBBUB_ARB_MAX_REQ_OUTSTAND, 256,
DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 256);
memset(&hubbub2->watermarks.a.cstate_pstate, 0, sizeof(hubbub2->watermarks.a.cstate_pstate));
}
/*static void hubbub35_set_request_limit(struct hubbub *hubbub,

View File

@ -57,6 +57,7 @@
#include "panel_cntl.h"
#include "dc_state_priv.h"
#include "dpcd_defs.h"
#include "dsc.h"
/* include DCE11 register header files */
#include "dce/dce_11_0_d.h"
#include "dce/dce_11_0_sh_mask.h"
@ -1823,6 +1824,48 @@ static void get_edp_links_with_sink(
}
}
static void clean_up_dsc_blocks(struct dc *dc)
{
struct display_stream_compressor *dsc = NULL;
struct timing_generator *tg = NULL;
struct stream_encoder *se = NULL;
struct dccg *dccg = dc->res_pool->dccg;
struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl;
int i;
if (dc->ctx->dce_version != DCN_VERSION_3_5 &&
dc->ctx->dce_version != DCN_VERSION_3_51)
return;
for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
struct dcn_dsc_state s = {0};
dsc = dc->res_pool->dscs[i];
dsc->funcs->dsc_read_state(dsc, &s);
if (s.dsc_fw_en) {
/* disable DSC in OPTC */
if (i < dc->res_pool->timing_generator_count) {
tg = dc->res_pool->timing_generators[i];
tg->funcs->set_dsc_config(tg, OPTC_DSC_DISABLED, 0, 0);
}
/* disable DSC in stream encoder */
if (i < dc->res_pool->stream_enc_count) {
se = dc->res_pool->stream_enc[i];
se->funcs->dp_set_dsc_config(se, OPTC_DSC_DISABLED, 0, 0);
se->funcs->dp_set_dsc_pps_info_packet(se, false, NULL, true);
}
/* disable DSC block */
if (dccg->funcs->set_ref_dscclk)
dccg->funcs->set_ref_dscclk(dccg, dsc->inst);
dsc->funcs->dsc_disable(dsc);
/* power down DSC */
if (pg_cntl != NULL)
pg_cntl->funcs->dsc_pg_control(pg_cntl, dsc->inst, false);
}
}
}
/*
* When ASIC goes from VBIOS/VGA mode to driver/accelerated mode we need:
* 1. Power down all DC HW blocks
@ -1927,6 +1970,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
power_down_all_hw_blocks(dc);
/* DSC could be enabled on eDP during VBIOS post.
* To clean up dsc blocks if eDP is in link but not active.
*/
if (edp_link_with_sink && (edp_stream_num == 0))
clean_up_dsc_blocks(dc);
disable_vga_and_power_gate_all_controllers(dc);
if (edp_link_with_sink && !keep_edp_vdd_on)
dc->hwss.edp_power_control(edp_link_with_sink, false);
@ -2046,13 +2096,20 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
* as well.
*/
for (i = 0; i < num_pipes; i++) {
pipe_ctx[i]->stream_res.tg->funcs->set_drr(
pipe_ctx[i]->stream_res.tg, &params);
/* dc_state_destruct() might null the stream resources, so fetch tg
* here first to avoid a race condition. The lifetime of the pointee
* itself (the timing_generator object) is not a problem here.
*/
struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
pipe_ctx[i]->stream_res.tg,
event_triggers, num_frames);
if ((tg != NULL) && tg->funcs) {
if (tg->funcs->set_drr)
tg->funcs->set_drr(tg, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
if (tg->funcs->set_static_screen_control)
tg->funcs->set_static_screen_control(
tg, event_triggers, num_frames);
}
}
}

View File

@ -455,7 +455,7 @@ bool dcn30_mmhubbub_warmup(
struct mcif_wb *mcif_wb;
struct mcif_warmup_params warmup_params = {0};
unsigned int i, i_buf;
/*make sure there is no active DWB eanbled */
/* make sure there is no active DWB enabled */
for (i = 0; i < num_dwb; i++) {
dwb = dc->res_pool->dwbc[wb_info[i].dwb_pipe_inst];
if (dwb->dwb_is_efc_transition || dwb->dwb_is_drc) {

View File

@ -1032,6 +1032,20 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
struct dsc_config dsc_cfg;
struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
struct dcn_dsc_state dsc_state = {0};
if (!dsc) {
DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
return;
}
if (dsc->funcs->dsc_read_state) {
dsc->funcs->dsc_read_state(dsc, &dsc_state);
if (!dsc_state.dsc_fw_en) {
DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
return;
}
}
/* Enable DSC hw block */
dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;

View File

@ -334,7 +334,20 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
struct dsc_config dsc_cfg;
struct dsc_optc_config dsc_optc_cfg = {0};
enum optc_dsc_mode optc_dsc_mode;
struct dcn_dsc_state dsc_state = {0};
if (!dsc) {
DC_LOG_DSC("DSC is NULL for tg instance %d:", pipe_ctx->stream_res.tg->inst);
return;
}
if (dsc->funcs->dsc_read_state) {
dsc->funcs->dsc_read_state(dsc, &dsc_state);
if (!dsc_state.dsc_fw_en) {
DC_LOG_DSC("DSC has been disabled for tg instance %d:", pipe_ctx->stream_res.tg->inst);
return;
}
}
/* Enable DSC hw block */
dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;

View File

@ -50,8 +50,31 @@ static void update_dpia_stream_allocation_table(struct dc_link *link,
DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n",
status, mst_alloc_slots, prev_mst_slots_in_use);
ASSERT(link_enc);
link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
if (link_enc)
link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
}
static void set_dio_dpia_link_test_pattern(struct dc_link *link,
const struct link_resource *link_res,
struct encoder_set_dp_phy_pattern_param *tp_params)
{
if (tp_params->dp_phy_pattern != DP_TEST_PATTERN_VIDEO_MODE)
return;
struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
if (!link_enc)
return;
link_enc->funcs->dp_set_phy_pattern(link_enc, tp_params);
link->dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_SET_SOURCE_PATTERN);
}
static void set_dio_dpia_lane_settings(struct dc_link *link,
const struct link_resource *link_res,
const struct dc_link_settings *link_settings,
const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX])
{
}
static const struct link_hwss dpia_link_hwss = {
@ -65,8 +88,8 @@ static const struct link_hwss dpia_link_hwss = {
.ext = {
.set_throttled_vcp_size = set_dio_throttled_vcp_size,
.enable_dp_link_output = enable_dio_dp_link_output,
.set_dp_link_test_pattern = set_dio_dp_link_test_pattern,
.set_dp_lane_settings = set_dio_dp_lane_settings,
.set_dp_link_test_pattern = set_dio_dpia_link_test_pattern,
.set_dp_lane_settings = set_dio_dpia_lane_settings,
.update_stream_allocation_table = update_dpia_stream_allocation_table,
},
};

View File

@ -287,6 +287,13 @@ static bool dp_validate_mode_timing(
req_bw = dc_bandwidth_in_kbps_from_timing(timing, dc_link_get_highest_encoding_format(link));
max_bw = dp_link_bandwidth_kbps(link, link_setting);
bool is_max_uncompressed_pixel_rate_exceeded = link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.valid &&
timing->pix_clk_100hz > link->dpcd_caps.max_uncompressed_pixel_rate_cap.bits.max_uncompressed_pixel_rate_cap * 10000;
if (is_max_uncompressed_pixel_rate_exceeded && !timing->flags.DSC) {
return false;
}
if (req_bw <= max_bw) {
/* remember the biggest mode here, during
* initial link training (to get

View File

@ -1942,6 +1942,11 @@ static bool retrieve_link_cap(struct dc_link *link)
DC_LOG_DP2("\tFEC aggregated error counters are supported");
}
core_link_read_dpcd(link,
DPCD_MAX_UNCOMPRESSED_PIXEL_RATE_CAP,
link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw,
sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw));
retrieve_cable_id(link);
dpcd_write_cable_id_to_dprx(link);

View File

@ -515,6 +515,41 @@ bool dp_is_interlane_aligned(union lane_align_status_updated align_status)
return align_status.bits.INTERLANE_ALIGN_DONE == 1;
}
bool dp_check_interlane_aligned(union lane_align_status_updated align_status,
struct dc_link *link,
uint8_t retries)
{
/* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
* has to share encoders unlike DP and USBC
*/
return (dp_is_interlane_aligned(align_status) ||
(link->skip_fallback_on_link_loss && retries));
}
uint32_t dp_get_eq_aux_rd_interval(
const struct dc_link *link,
const struct link_training_settings *lt_settings,
uint32_t offset,
uint8_t retries)
{
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
if (offset == 0 && retries == 1 && lt_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT)
return max(lt_settings->eq_pattern_time, (uint32_t) DPIA_CLK_SYNC_DELAY);
else
return dpia_get_eq_aux_rd_interval(link, lt_settings, offset);
} else if (is_repeater(lt_settings, offset))
return dp_translate_training_aux_read_interval(
link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
else
return lt_settings->eq_pattern_time;
}
bool dp_check_dpcd_reqeust_status(const struct dc_link *link,
enum dc_status status)
{
return (status != DC_OK && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA);
}
enum link_training_result dp_check_link_loss_status(
struct dc_link *link,
const struct link_training_settings *link_training_setting)
@ -973,13 +1008,17 @@ void repeater_training_done(struct dc_link *link, uint32_t offset)
dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
}
static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding)
static enum link_training_result dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding)
{
enum dc_status status;
uint8_t sink_status = 0;
uint8_t i;
/* clear training pattern set */
dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
status = dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
if (dp_check_dpcd_reqeust_status(link, status))
return LINK_TRAINING_ABORT;
if (encoding == DP_128b_132b_ENCODING) {
/* poll for intra-hop disable */
@ -990,6 +1029,8 @@ static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding
fsleep(1000);
}
}
return LINK_TRAINING_SUCCESS;
}
enum dc_status dpcd_configure_channel_coding(struct dc_link *link,
@ -1013,17 +1054,18 @@ enum dc_status dpcd_configure_channel_coding(struct dc_link *link,
return status;
}
void dpcd_set_training_pattern(
enum dc_status dpcd_set_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern training_pattern)
{
enum dc_status status;
union dpcd_training_pattern dpcd_pattern = {0};
dpcd_pattern.v1_4.TRAINING_PATTERN_SET =
dp_training_pattern_to_dpcd_training_pattern(
link, training_pattern);
core_link_write_dpcd(
status = core_link_write_dpcd(
link,
DP_TRAINING_PATTERN_SET,
&dpcd_pattern.raw,
@ -1033,6 +1075,8 @@ void dpcd_set_training_pattern(
__func__,
DP_TRAINING_PATTERN_SET,
dpcd_pattern.v1_4.TRAINING_PATTERN_SET);
return status;
}
enum dc_status dpcd_set_link_settings(
@ -1185,6 +1229,13 @@ void dpcd_set_lt_pattern_and_lane_settings(
dpcd_lt_buffer[DP_TRAINING_PATTERN_SET - DP_TRAINING_PATTERN_SET]
= dpcd_pattern.raw;
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
dpia_set_tps_notification(
link,
lt_settings,
dpcd_pattern.v1_4.TRAINING_PATTERN_SET,
offset);
if (is_repeater(lt_settings, offset)) {
DC_LOG_HW_LINK_TRAINING("%s\n LTTPR Repeater ID: %d\n 0x%X pattern = %x\n",
__func__,
@ -1455,7 +1506,8 @@ static enum link_training_result dp_transition_to_video_idle(
*/
if (link->connector_signal != SIGNAL_TYPE_EDP && status == LINK_TRAINING_SUCCESS) {
msleep(5);
status = dp_check_link_loss_status(link, lt_settings);
if (!link->skip_fallback_on_link_loss)
status = dp_check_link_loss_status(link, lt_settings);
}
return status;
}
@ -1521,7 +1573,9 @@ enum link_training_result dp_perform_link_training(
ASSERT(0);
/* exit training mode */
dpcd_exit_training_mode(link, encoding);
if ((dpcd_exit_training_mode(link, encoding) != LINK_TRAINING_SUCCESS || status == LINK_TRAINING_ABORT) &&
link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
dpia_training_abort(link, &lt_settings, 0);
/* switch to video idle */
if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern)
@ -1599,8 +1653,7 @@ bool perform_link_training_with_retries(
dp_perform_link_training_skip_aux(link, &pipe_ctx->link_res, &cur_link_settings);
return true;
} else {
/** @todo Consolidate USB4 DP and DPx.x training. */
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
if (!link->dc->config.consolidated_dpia_dp_lt && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
status = dpia_perform_link_training(
link,
&pipe_ctx->link_res,
@ -1629,8 +1682,17 @@ bool perform_link_training_with_retries(
dp_trace_lt_total_count_increment(link, false);
dp_trace_lt_result_update(link, status, false);
dp_trace_set_lt_end_timestamp(link, false);
if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low)
if (status == LINK_TRAINING_SUCCESS && !is_link_bw_low) {
// Update verified link settings to current one
// Because DPIA LT might fallback to lower link setting.
if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
link->verified_link_cap.link_rate = link->cur_link_settings.link_rate;
link->verified_link_cap.lane_count = link->cur_link_settings.lane_count;
dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link);
}
return true;
}
}
fail_count++;

View File

@ -55,7 +55,7 @@ void dp_set_hw_test_pattern(
uint8_t *custom_pattern,
uint32_t custom_pattern_size);
void dpcd_set_training_pattern(
enum dc_status dpcd_set_training_pattern(
struct dc_link *link,
enum dc_dp_training_pattern training_pattern);
@ -182,4 +182,18 @@ uint32_t dp_translate_training_aux_read_interval(
uint8_t dp_get_nibble_at_index(const uint8_t *buf,
uint32_t index);
bool dp_check_interlane_aligned(union lane_align_status_updated align_status,
struct dc_link *link,
uint8_t retries);
uint32_t dp_get_eq_aux_rd_interval(
const struct dc_link *link,
const struct link_training_settings *lt_settings,
uint32_t offset,
uint8_t retries);
bool dp_check_dpcd_reqeust_status(const struct dc_link *link,
enum dc_status status);
#endif /* __DC_LINK_DP_TRAINING_H__ */

View File

@ -157,6 +157,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
struct link_training_settings *lt_settings,
uint32_t offset)
{
enum dc_status status;
uint32_t retries_cr;
uint32_t retry_count;
uint32_t wait_time_microsec;
@ -216,7 +217,7 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
/* 4. Read lane status and requested drive
* settings as set by the sink
*/
dp_get_lane_status_and_lane_adjust(
status = dp_get_lane_status_and_lane_adjust(
link,
lt_settings,
dpcd_lane_status,
@ -224,6 +225,9 @@ enum link_training_result perform_8b_10b_clock_recovery_sequence(
dpcd_lane_adjust,
offset);
if (dp_check_dpcd_reqeust_status(link, status))
return LINK_TRAINING_ABORT;
/* 5. check CR done*/
if (dp_is_cr_done(lane_count, dpcd_lane_status)) {
DC_LOG_HW_LINK_TRAINING("%s: Clock recovery OK\n", __func__);
@ -273,6 +277,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
struct link_training_settings *lt_settings,
uint32_t offset)
{
enum dc_status status;
enum dc_dp_training_pattern tr_pattern;
uint32_t retries_ch_eq;
uint32_t wait_time_microsec;
@ -308,12 +313,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
dpcd_set_lane_settings(link, lt_settings, offset);
/* 3. wait for receiver to lock-on*/
wait_time_microsec = lt_settings->eq_pattern_time;
if (is_repeater(lt_settings, offset))
wait_time_microsec =
dp_translate_training_aux_read_interval(
link->dpcd_caps.lttpr_caps.aux_rd_interval[offset - 1]);
wait_time_microsec = dp_get_eq_aux_rd_interval(link, lt_settings, offset, retries_ch_eq);
dp_wait_for_training_aux_rd_interval(
link,
@ -322,7 +322,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
/* 4. Read lane status and requested
* drive settings as set by the sink*/
dp_get_lane_status_and_lane_adjust(
status = dp_get_lane_status_and_lane_adjust(
link,
lt_settings,
dpcd_lane_status,
@ -330,6 +330,9 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
dpcd_lane_adjust,
offset);
if (dp_check_dpcd_reqeust_status(link, status))
return LINK_TRAINING_ABORT;
/* 5. check CR done*/
if (!dp_is_cr_done(lane_count, dpcd_lane_status))
return dpcd_lane_status[0].bits.CR_DONE_0 ?
@ -339,7 +342,7 @@ enum link_training_result perform_8b_10b_channel_equalization_sequence(
/* 6. check CHEQ done*/
if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
dp_is_symbol_locked(lane_count, dpcd_lane_status) &&
dp_is_interlane_aligned(dpcd_lane_status_updated))
dp_check_interlane_aligned(dpcd_lane_status_updated, link, retries_ch_eq))
return LINK_TRAINING_SUCCESS;
/* 7. update VS/PE/PC2 in lt_settings*/

View File

@ -43,9 +43,6 @@
#define DC_LOGGER \
link->ctx->logger
/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */
#define DPIA_CLK_SYNC_DELAY 16000
/* Extend interval between training status checks for manual testing. */
#define DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US 60000000
@ -566,28 +563,6 @@ static enum link_training_result dpia_training_cr_phase(
return result;
}
/* Return status read interval during equalization phase. */
static uint32_t dpia_get_eq_aux_rd_interval(
const struct dc_link *link,
const struct link_training_settings *lt_settings,
uint32_t hop)
{
uint32_t wait_time_microsec;
if (hop == DPRX)
wait_time_microsec = lt_settings->eq_pattern_time;
else
wait_time_microsec =
dp_translate_training_aux_read_interval(
link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]);
/* Check debug option for extending aux read interval. */
if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval)
wait_time_microsec = DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US;
return wait_time_microsec;
}
/* Execute equalization phase of link training for specified hop in display
* path in non-transparent mode:
* - driver issues both DPCD and SET_CONFIG transactions.
@ -936,6 +911,22 @@ static enum link_training_result dpia_training_end(
return result;
}
/* Return status read interval during equalization phase. */
uint32_t dpia_get_eq_aux_rd_interval(
const struct dc_link *link,
const struct link_training_settings *lt_settings,
uint32_t hop)
{
/* Check debug option for extending aux read interval. */
if (link->dc->debug.dpia_debug.bits.extend_aux_rd_interval)
return DPIA_DEBUG_EXTENDED_AUX_RD_INTERVAL_US;
else if (hop == DPRX)
return lt_settings->eq_pattern_time;
else
return dp_translate_training_aux_read_interval(
link->dpcd_caps.lttpr_caps.aux_rd_interval[hop - 1]);
}
/* When aborting training of specified hop in display path, clean up by:
* - Attempting to clear DPCD TRAINING_PATTERN_SET, LINK_BW_SET and LANE_COUNT_SET.
* - Sending SET_CONFIG(SET_LINK) with lane count and link rate set to 0.
@ -943,7 +934,7 @@ static enum link_training_result dpia_training_end(
* @param link DPIA link being trained.
* @param hop Hop in display path. DPRX = 0.
*/
static void dpia_training_abort(
void dpia_training_abort(
struct dc_link *link,
struct link_training_settings *lt_settings,
uint32_t hop)
@ -968,7 +959,26 @@ static void dpia_training_abort(
core_link_write_dpcd(link, dpcd_tps_offset, &data, 1);
core_link_write_dpcd(link, DP_LINK_BW_SET, &data, 1);
core_link_write_dpcd(link, DP_LANE_COUNT_SET, &data, 1);
core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data);
if (!link->dc->config.consolidated_dpia_dp_lt)
core_link_send_set_config(link, DPIA_SET_CFG_SET_LINK, data);
}
void dpia_set_tps_notification(
struct dc_link *link,
const struct link_training_settings *lt_settings,
uint8_t pattern,
uint32_t hop)
{
uint8_t repeater_cnt = 0; /* Number of hops/repeaters in display path. */
if (lt_settings->lttpr_mode != LTTPR_MODE_NON_TRANSPARENT || pattern == DPCD_TRAINING_PATTERN_VIDEOIDLE)
return;
repeater_cnt = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt);
if (hop != repeater_cnt)
dc_process_dmub_dpia_set_tps_notification(link->ctx->dc, link->link_index, pattern);
}
enum link_training_result dpia_perform_link_training(

View File

@ -28,6 +28,9 @@
#define __DC_LINK_DP_TRAINING_DPIA_H__
#include "link_dp_training.h"
/* The approximate time (us) it takes to transmit 9 USB4 DP clock sync packets. */
#define DPIA_CLK_SYNC_DELAY 16000
/* Train DP tunneling link for USB4 DPIA display endpoint.
* DPIA equivalent of dc_link_dp_perfrorm_link_training.
* Aborts link training upon detection of sink unplug.
@ -38,4 +41,20 @@ enum link_training_result dpia_perform_link_training(
const struct dc_link_settings *link_setting,
bool skip_video_pattern);
void dpia_training_abort(
struct dc_link *link,
struct link_training_settings *lt_settings,
uint32_t hop);
uint32_t dpia_get_eq_aux_rd_interval(
const struct dc_link *link,
const struct link_training_settings *lt_settings,
uint32_t hop);
void dpia_set_tps_notification(
struct dc_link *link,
const struct link_training_settings *lt_settings,
uint8_t pattern,
uint32_t offset);
#endif /* __DC_LINK_DP_TRAINING_DPIA_H__ */

View File

@ -2155,6 +2155,7 @@ static bool dcn35_resource_construct(
dc->dml2_options.max_segments_per_hubp = 24;
dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
dc->dml2_options.override_det_buffer_size_kbytes = true;
if (dc->config.sdpif_request_limit_words_per_umc == 0)
dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/

View File

@ -736,7 +736,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.hdmichar = true,
.dpstream = true,
.symclk32_se = true,
.symclk32_le = true,
.symclk32_le = false,
.symclk_fe = true,
.physymclk = false,
.dpiasymclk = true,
@ -2133,6 +2133,7 @@ static bool dcn351_resource_construct(
dc->dml2_options.max_segments_per_hubp = 24;
dc->dml2_options.det_segment_size = DCN3_2_DET_SEG_SIZE;/*todo*/
dc->dml2_options.override_det_buffer_size_kbytes = true;
if (dc->config.sdpif_request_limit_words_per_umc == 0)
dc->config.sdpif_request_limit_words_per_umc = 16;/*todo*/

View File

@ -813,6 +813,14 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch)
return skip_easf;
}
/* Check if video is in fullscreen mode */
static bool spl_is_video_fullscreen(struct spl_in *spl_in)
{
if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen)
return true;
return false;
}
static bool spl_get_isharp_en(struct spl_in *spl_in,
struct spl_scratch *spl_scratch)
{
@ -820,6 +828,7 @@ static bool spl_get_isharp_en(struct spl_in *spl_in,
int vratio = 0;
int hratio = 0;
struct spl_taps taps = spl_scratch->scl_data.taps;
bool fullscreen = spl_is_video_fullscreen(spl_in);
/* Return if adaptive sharpness is disabled */
if (spl_in->adaptive_sharpness.enable == false)
@ -835,9 +844,18 @@ static bool spl_get_isharp_en(struct spl_in *spl_in,
// Scaling is up to 1:1 (no scaling) or upscaling
/*
* Apply sharpness to all RGB surfaces and to
* NV12/P010 surfaces
* Apply sharpness to RGB and YUV (NV12/P010)
* surfaces based on policy setting
*/
if (!spl_is_yuv420(spl_in->basic_in.format) &&
(spl_in->debug.sharpen_policy == SHARPEN_YUV))
return enable_isharp;
else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) &&
(spl_in->debug.sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV))
return enable_isharp;
else if (!spl_in->is_fullscreen &&
spl_in->debug.sharpen_policy == SHARPEN_FULLSCREEN_ALL)
return enable_isharp;
/*
* Apply sharpness if supports horizontal taps 4,6 AND
@ -1155,14 +1173,19 @@ static void spl_set_dscl_prog_data(struct spl_in *spl_in, struct spl_scratch *sp
}
/* Calculate C0-C3 coefficients based on HDR_mult */
static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t hdr_multx100)
static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint32_t sdr_white_level_nits)
{
struct spl_fixed31_32 hdr_mult, c0_mult, c1_mult, c2_mult;
struct spl_fixed31_32 c0_calc, c1_calc, c2_calc;
struct spl_custom_float_format fmt;
uint32_t hdr_multx100_int;
SPL_ASSERT(hdr_multx100);
hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100, 100LL);
if ((sdr_white_level_nits >= 80) && (sdr_white_level_nits <= 480))
hdr_multx100_int = sdr_white_level_nits * 100 / 80;
else
hdr_multx100_int = 100; /* default for 80 nits otherwise */
hdr_mult = spl_fixpt_from_fraction((long long)hdr_multx100_int, 100LL);
c0_mult = spl_fixpt_from_fraction(2126LL, 10000LL);
c1_mult = spl_fixpt_from_fraction(7152LL, 10000LL);
c2_mult = spl_fixpt_from_fraction(722LL, 10000LL);
@ -1191,7 +1214,7 @@ static void spl_calculate_c0_c3_hdr(struct dscl_prog_data *dscl_prog_data, uint3
static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *spl_out, bool enable_easf_v,
bool enable_easf_h, enum linear_light_scaling lls_pref,
enum spl_pixel_format format, enum system_setup setup,
uint32_t hdr_multx100)
uint32_t sdr_white_level_nits)
{
struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data;
if (enable_easf_v) {
@ -1499,7 +1522,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s
dscl_prog_data->easf_ltonl_en = 1; // Linear input
if ((setup == HDR_L) && (spl_is_rgb8(format))) {
/* Calculate C0-C3 coefficients based on HDR multiplier */
spl_calculate_c0_c3_hdr(dscl_prog_data, hdr_multx100);
spl_calculate_c0_c3_hdr(dscl_prog_data, sdr_white_level_nits);
} else { // HDR_L ( DWM ) and SDR_L
dscl_prog_data->easf_matrix_c0 =
0x4EF7; // fp1.5.10, C0 coefficient (LN_rec709: 0.2126 * (2^14)/125 = 27.86590720)
@ -1557,7 +1580,7 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
struct adaptive_sharpness adp_sharpness, bool enable_isharp,
enum linear_light_scaling lls_pref, enum spl_pixel_format format,
const struct spl_scaler_data *data, struct spl_fixed31_32 ratio,
enum system_setup setup)
enum system_setup setup, enum scale_to_sharpness_policy scale_to_sharpness_policy)
{
/* Turn off sharpener if not required */
if (!enable_isharp) {
@ -1565,6 +1588,11 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
return;
}
spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness,
scale_to_sharpness_policy);
dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup);
dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level;
dscl_prog_data->isharp_en = 1; // ISHARP_EN
// Set ISHARP_NOISEDET_MODE if htaps = 6-tap
if (data->taps.h_taps == 6) {
@ -1662,11 +1690,6 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data,
dscl_prog_data->isharp_lba.base_seg[5] = 0; // ISHARP LBA PWL for Seg 5. BASE value in U0.6 format
}
spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness);
dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup);
dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level;
// Program the nldelta soft clip values
if (lls_pref == LLS_PREF_YES) {
dscl_prog_data->isharp_nldelta_sclip.enable_p = 0; /* ISHARP_NLDELTA_SCLIP_EN_P */
@ -1750,7 +1773,7 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out)
// Set EASF
spl_set_easf_data(&spl_scratch, spl_out, enable_easf_v, enable_easf_h, spl_in->lls_pref,
spl_in->basic_in.format, setup, spl_in->hdr_multx100);
spl_in->basic_in.format, setup, spl_in->sdr_white_level_nits);
// Set iSHARP
vratio = spl_fixpt_ceil(spl_scratch.scl_data.ratios.vert);
@ -1761,7 +1784,8 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out)
isharp_scale_ratio = spl_scratch.scl_data.recip_ratios.horz;
spl_set_isharp_data(spl_out->dscl_prog_data, spl_in->adaptive_sharpness, enable_isharp,
spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup);
spl_in->lls_pref, spl_in->basic_in.format, data, isharp_scale_ratio, setup,
spl_in->debug.scale_to_sharpness_policy);
return res;
}

View File

@ -500,6 +500,15 @@ struct isharp_1D_lut_pregen filter_isharp_1D_lut_pregen[NUM_SHARPNESS_SETUPS] =
},
};
struct scale_ratio_to_sharpness_level_adj sharpness_level_adj[NUM_SHARPNESS_ADJ_LEVELS] = {
{1125, 1000, 0},
{11, 10, 1},
{1075, 1000, 2},
{105, 100, 3},
{1025, 1000, 4},
{1, 1, 5},
};
const uint32_t *spl_get_filter_isharp_1D_lut_0(void)
{
return filter_isharp_1D_lut_0;
@ -541,19 +550,72 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void)
return filter_isharp_bs_3tap_64p_s1_12;
}
static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level, enum system_setup setup,
struct spl_sharpness_range sharpness_range)
static unsigned int spl_calculate_sharpness_level_adj(struct spl_fixed31_32 ratio)
{
int j;
struct spl_fixed31_32 ratio_level;
struct scale_ratio_to_sharpness_level_adj *lookup_ptr;
unsigned int sharpness_level_down_adj;
/*
* Adjust sharpness level based on current scaling ratio
*
* We have 5 discrete scaling ratios which we will use to adjust the
* sharpness level down by 1 as we pass each ratio. The ratios
* are
*
* 1.125 upscale and higher - no adj
* 1.100 - under 1.125 - adj level down 1
* 1.075 - under 1.100 - adj level down 2
* 1.050 - under 1.075 - adj level down 3
* 1.025 - under 1.050 - adj level down 4
* 1.000 - under 1.025 - adj level down 5
*
*/
j = 0;
sharpness_level_down_adj = 0;
lookup_ptr = sharpness_level_adj;
while (j < NUM_SHARPNESS_ADJ_LEVELS) {
ratio_level = spl_fixpt_from_fraction(lookup_ptr->ratio_numer,
lookup_ptr->ratio_denom);
if (ratio.value >= ratio_level.value) {
sharpness_level_down_adj = lookup_ptr->level_down_adj;
break;
}
lookup_ptr++;
j++;
}
return sharpness_level_down_adj;
}
static unsigned int spl_calculate_sharpness_level(struct spl_fixed31_32 ratio,
int discrete_sharpness_level, enum system_setup setup,
struct spl_sharpness_range sharpness_range,
enum scale_to_sharpness_policy scale_to_sharpness_policy)
{
unsigned int sharpness_level = 0;
unsigned int sharpness_level_down_adj = 0;
int min_sharpness, max_sharpness, mid_sharpness;
/*
* Adjust sharpness level if policy requires we adjust it based on
* scale ratio. Based on scale ratio, we may adjust the sharpness
* level down by a certain number of steps. We will not select
* a sharpness value of 0 so the lowest sharpness level will be
* 0 or 1 depending on what the min_sharpness is
*
* If the policy is no required, this code maybe removed at a later
* date
*/
switch (setup) {
case HDR_L:
min_sharpness = sharpness_range.hdr_rgb_min;
max_sharpness = sharpness_range.hdr_rgb_max;
mid_sharpness = sharpness_range.hdr_rgb_mid;
if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL)
sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
break;
case HDR_NL:
/* currently no use case, use Non-linear SDR values for now */
@ -561,15 +623,26 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level,
min_sharpness = sharpness_range.sdr_yuv_min;
max_sharpness = sharpness_range.sdr_yuv_max;
mid_sharpness = sharpness_range.sdr_yuv_mid;
if (scale_to_sharpness_policy >= SCALE_TO_SHARPNESS_ADJ_YUV)
sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
break;
case SDR_L:
default:
min_sharpness = sharpness_range.sdr_rgb_min;
max_sharpness = sharpness_range.sdr_rgb_max;
mid_sharpness = sharpness_range.sdr_rgb_mid;
if (scale_to_sharpness_policy == SCALE_TO_SHARPNESS_ADJ_ALL)
sharpness_level_down_adj = spl_calculate_sharpness_level_adj(ratio);
break;
}
if ((min_sharpness == 0) && (sharpness_level_down_adj >= discrete_sharpness_level))
discrete_sharpness_level = 1;
else if (sharpness_level_down_adj >= discrete_sharpness_level)
discrete_sharpness_level = 0;
else
discrete_sharpness_level -= sharpness_level_down_adj;
int lower_half_step_size = (mid_sharpness - min_sharpness) / 5;
int upper_half_step_size = (max_sharpness - mid_sharpness) / 5;
@ -584,7 +657,7 @@ static unsigned int spl_calculate_sharpness_level(int discrete_sharpness_level,
}
void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup,
struct adaptive_sharpness sharpness)
struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy)
{
uint8_t *byte_ptr_1dlut_src, *byte_ptr_1dlut_dst;
struct spl_fixed31_32 sharp_base, sharp_calc, sharp_level;
@ -594,8 +667,9 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en
uint32_t filter_pregen_store[ISHARP_LUT_TABLE_SIZE];
/* Custom sharpnessX1000 value */
unsigned int sharpnessX1000 = spl_calculate_sharpness_level(sharpness.sharpness_level,
setup, sharpness.sharpness_range);
unsigned int sharpnessX1000 = spl_calculate_sharpness_level(ratio,
sharpness.sharpness_level, setup,
sharpness.sharpness_range, scale_to_sharpness_policy);
sharp_level = spl_fixpt_from_fraction(sharpnessX1000, 1000);
/*
@ -606,7 +680,6 @@ void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, en
(filter_isharp_1D_lut_pregen[setup].sharpness_denom == 1000))
return;
/*
* Calculate LUT_128_gained with this equation:
*

View File

@ -20,11 +20,11 @@ uint16_t *spl_get_filter_isharp_bs_3tap_64p(void);
const uint16_t *spl_get_filter_isharp_wide_6tap_64p(void);
uint16_t *spl_dscl_get_blur_scale_coeffs_64p(int taps);
struct scale_ratio_to_sharpness_level_lookup {
#define NUM_SHARPNESS_ADJ_LEVELS 6
struct scale_ratio_to_sharpness_level_adj {
unsigned int ratio_numer;
unsigned int ratio_denom;
unsigned int sharpness_numer;
unsigned int sharpness_denom;
unsigned int level_down_adj; /* adjust sharpness level down */
};
struct isharp_1D_lut_pregen {
@ -45,6 +45,7 @@ void spl_init_blur_scale_coeffs(void);
void spl_set_blur_scale_data(struct dscl_prog_data *dscl_prog_data,
const struct spl_scaler_data *data);
void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup, struct adaptive_sharpness sharpness);
void spl_build_isharp_1dlut_from_reference_curve(struct spl_fixed31_32 ratio, enum system_setup setup,
struct adaptive_sharpness sharpness, enum scale_to_sharpness_policy scale_to_sharpness_policy);
uint32_t *spl_get_pregen_filter_isharp_1D_lut(enum system_setup setup);
#endif /* __DC_SPL_ISHARP_FILTERS_H__ */

View File

@ -487,6 +487,17 @@ enum linear_light_scaling { // convert it in translation logic
LLS_PREF_YES,
LLS_PREF_NO
};
enum sharpen_policy {
SHARPEN_ALWAYS = 0,
SHARPEN_YUV = 1,
SHARPEN_RGB_FULLSCREEN_YUV = 2,
SHARPEN_FULLSCREEN_ALL = 3
};
enum scale_to_sharpness_policy {
NO_SCALE_TO_SHARPNESS_ADJ = 0,
SCALE_TO_SHARPNESS_ADJ_YUV = 1,
SCALE_TO_SHARPNESS_ADJ_ALL = 2
};
struct spl_funcs {
void (*spl_calc_lb_num_partitions)
(bool alpha_en,
@ -499,6 +510,8 @@ struct spl_funcs {
struct spl_debug {
int visual_confirm_base_offset;
int visual_confirm_dpp_offset;
enum sharpen_policy sharpen_policy;
enum scale_to_sharpness_policy scale_to_sharpness_policy;
};
struct spl_in {
@ -518,7 +531,7 @@ struct spl_in {
bool is_hdr_on;
int h_active;
int v_active;
int hdr_multx100;
int sdr_white_level_nits;
};
// end of SPL inputs

View File

@ -300,6 +300,7 @@ struct dmub_srv_hw_params {
enum dmub_ips_disable_type disable_ips;
bool disallow_phy_access;
bool disable_sldo_opt;
bool enable_non_transparent_setconfig;
};
/**

View File

@ -682,7 +682,7 @@ union dmub_fw_boot_options {
uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/
uint32_t usb4_cm_version: 1; /**< 1 CM support */
uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */
uint32_t reserved0: 1;
uint32_t enable_non_transparent_setconfig: 1; /* 1 if dpia use conventional dp lt flow*/
uint32_t disable_clk_ds: 1; /* 1 if disallow dispclk_ds and dppclk_ds*/
uint32_t disable_timeout_recovery : 1; /* 1 if timeout recovery should be disabled */
uint32_t ips_pg_disable: 1; /* 1 to disable ONO domains power gating*/
@ -1308,6 +1308,7 @@ enum dmub_cmd_dpia_type {
DMUB_CMD__DPIA_DIG1_DPIA_CONTROL = 0,
DMUB_CMD__DPIA_SET_CONFIG_ACCESS = 1,
DMUB_CMD__DPIA_MST_ALLOC_SLOTS = 2,
DMUB_CMD__DPIA_SET_TPS_NOTIFICATION = 3,
};
/* DMUB_OUT_CMD__DPIA_NOTIFICATION command types. */
@ -2138,6 +2139,24 @@ struct dmub_rb_cmd_set_mst_alloc_slots {
struct dmub_cmd_mst_alloc_slots_control_data mst_slots_control; /* mst slots control */
};
/**
* Data passed from driver to FW in a DMUB_CMD__SET_TPS_NOTIFICATION command.
*/
struct dmub_cmd_tps_notification_data {
uint8_t instance; /* DPIA instance */
uint8_t tps; /* requested training pattern */
uint8_t reserved1;
uint8_t reserved2;
};
/**
* DMUB command structure for SET_TPS_NOTIFICATION command.
*/
struct dmub_rb_cmd_set_tps_notification {
struct dmub_cmd_header header; /* header */
struct dmub_cmd_tps_notification_data tps_notification; /* set tps_notification data */
};
/**
* DMUB command structure for DPIA HPD int enable control.
*/
@ -5304,6 +5323,10 @@ union dmub_rb_cmd {
* Definition of a DMUB_CMD__DPIA_MST_ALLOC_SLOTS command.
*/
struct dmub_rb_cmd_set_mst_alloc_slots set_mst_alloc_slots;
/**
* Definition of a DMUB_CMD__DPIA_SET_TPS_NOTIFICATION command.
*/
struct dmub_rb_cmd_set_tps_notification set_tps_notification;
/**
* Definition of a DMUB_CMD__EDID_CEA command.
*/

View File

@ -425,6 +425,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu
boot_options.bits.ips_disable = params->disable_ips;
boot_options.bits.ips_sequential_ono = params->ips_sequential_ono;
boot_options.bits.disable_sldo_opt = params->disable_sldo_opt;
boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig;
REG_WRITE(DMCUB_SCRATCH14, boot_options.all);
}

View File

@ -134,7 +134,7 @@ unsigned int mod_freesync_calc_v_total_from_refresh(
v_total = div64_u64(div64_u64(((unsigned long long)(
frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)),
stream->timing.h_total), 1000000);
stream->timing.h_total) + 500000, 1000000);
/* v_total cannot be less than nominal */
if (v_total < stream->timing.v_total) {

View File

@ -85,7 +85,7 @@ enum amd_apu_flags {
* @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler
* @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine
* @AMD_IP_BLOCK_TYPE_VPE: Video Processing Engine
* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Schduler for Multimedia
* @AMD_IP_BLOCK_TYPE_UMSCH_MM: User Mode Scheduler for Multimedia
* @AMD_IP_BLOCK_TYPE_ISP: Image Signal Processor
* @AMD_IP_BLOCK_TYPE_NUM: Total number of IP block types
*/

View File

@ -71,6 +71,11 @@ enum kgd_memory_pool {
KGD_POOL_FRAMEBUFFER = 3,
};
struct kfd_cu_occupancy {
u32 wave_cnt;
u32 doorbell_off;
};
/**
* enum kfd_sched_policy
*
@ -313,8 +318,9 @@ struct kfd2kgd_calls {
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data);
void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
void (*get_cu_occupancy)(struct amdgpu_device *adev,
struct kfd_cu_occupancy *cu_occupancy,
int *max_waves_per_cu, uint32_t inst);
void (*program_trap_handler_settings)(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);

View File

@ -123,7 +123,7 @@ typedef enum {
VOLTAGE_GUARDBAND_COUNT
} GFX_GUARDBAND_e;
#define SMU_METRICS_TABLE_VERSION 0xC
#define SMU_METRICS_TABLE_VERSION 0xD
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
@ -227,6 +227,10 @@ typedef struct __attribute__((packed, aligned(4))) {
// PCIE LINK Speed and width
uint32_t PCIeLinkSpeed;
uint32_t PCIeLinkWidth;
// PER XCD ACTIVITY
uint32_t GfxBusy[8];
uint64_t GfxBusyAcc[8];
} MetricsTableX_t;
typedef struct __attribute__((packed, aligned(4))) {

View File

@ -2569,10 +2569,14 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
}
}
return smu_cmn_send_smc_msg_with_param(smu,
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_SetWorkloadMask,
workload_mask,
NULL);
if (!ret)
smu->workload_mask = workload_mask;
return ret;
}
static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)

View File

@ -2107,8 +2107,12 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_v13_0_6_request_i2c_xfer(smu, req);
if (r)
goto fail;
if (r) {
/* Retry once, in case of an i2c collision */
r = smu_v13_0_6_request_i2c_xfer(smu, req);
if (r)
goto fail;
}
for (c = i = 0; i < num_msgs; i++) {
if (!(msg[i].flags & I2C_M_RD)) {

View File

@ -2501,8 +2501,11 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp
return -EINVAL;
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
1 << workload_type, NULL);
if (ret)
dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
else
smu->workload_mask = (1 << workload_type);
return ret;
}

View File

@ -1861,10 +1861,14 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
if (workload_type < 0)
return -EINVAL;
return smu_cmn_send_smc_msg_with_param(smu,
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_SetWorkloadMask,
1 << workload_type,
NULL);
if (!ret)
smu->workload_mask = 1 << workload_type;
return ret;
}
static int smu_v14_0_2_baco_enter(struct smu_context *smu)