mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-04 04:06:26 +00:00
amd-drm-fixes-6.6-2023-09-13:
amdgpu: - GC 9.4.3 fixes - Fix white screen issues with S/G display on system with >= 64G of ram - Replay fixes - SMU 13.0.6 fixes - AUX backlight fix - NBIO 4.3 SR-IOV fixes for HDP - RAS fixes - DP MST resume fix - Fix segfault on systems with no vbios - DPIA fixes amdkfd: - CWSR grace period fix - Unaligned doorbell fix - CRIU fix for GFX11 - Add missing TLB flush on gfx10 and newer -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZQIRSAAKCRC93/aFa7yZ 2O/nAP4zB0fdLB46Hhz11aYsE9Zghe91b2rcmF4EYpEAQs7awwEAhSjy0Wiy6EYb prEGCdW0O8Tq7fdjr7+JrPmF7dasAQk= =SUbg -----END PGP SIGNATURE----- Merge tag 'amd-drm-fixes-6.6-2023-09-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes amd-drm-fixes-6.6-2023-09-13: amdgpu: - GC 9.4.3 fixes - Fix white screen issues with S/G display on system with >= 64G of ram - Replay fixes - SMU 13.0.6 fixes - AUX backlight fix - NBIO 4.3 SR-IOV fixes for HDP - RAS fixes - DP MST resume fix - Fix segfault on systems with no vbios - DPIA fixes amdkfd: - CWSR grace period fix - Unaligned doorbell fix - CRIU fix for GFX11 - Add missing TLB flush on gfx10 and newer Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230913195009.7714-1-alexander.deucher@amd.com
This commit is contained in:
commit
1216d49178
@ -1293,7 +1293,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
|
||||
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
|
||||
bool amdgpu_device_need_post(struct amdgpu_device *adev);
|
||||
bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
|
||||
bool amdgpu_device_pcie_dynamic_switching_supported(void);
|
||||
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
|
||||
bool amdgpu_device_aspm_support_quirk(void);
|
||||
|
@ -478,7 +478,7 @@ void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *c
|
||||
cu_info->cu_active_number = acu_info.number;
|
||||
cu_info->cu_ao_mask = acu_info.ao_cu_mask;
|
||||
memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
|
||||
sizeof(acu_info.bitmap));
|
||||
sizeof(cu_info->cu_bitmap));
|
||||
cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
|
||||
cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
|
||||
cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
|
||||
|
@ -980,8 +980,7 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data,
|
||||
uint32_t inst)
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
*reg_data = wait_times;
|
||||
|
||||
|
@ -55,5 +55,4 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data,
|
||||
uint32_t inst);
|
||||
uint32_t *reg_data);
|
||||
|
@ -1103,8 +1103,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data,
|
||||
uint32_t inst)
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
*reg_data = wait_times;
|
||||
|
||||
@ -1120,8 +1119,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
SCH_WAVE,
|
||||
grace_period);
|
||||
|
||||
*reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
|
||||
mmCP_IQ_WAIT_TIME2);
|
||||
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
|
||||
}
|
||||
|
||||
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
|
||||
|
@ -100,5 +100,4 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data,
|
||||
uint32_t inst);
|
||||
uint32_t *reg_data);
|
||||
|
@ -1244,32 +1244,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* On APUs with >= 64GB white flickering has been observed w/ SG enabled.
|
||||
* Disable S/G on such systems until we have a proper fix.
|
||||
* https://gitlab.freedesktop.org/drm/amd/-/issues/2354
|
||||
* https://gitlab.freedesktop.org/drm/amd/-/issues/2735
|
||||
*/
|
||||
bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
|
||||
{
|
||||
switch (amdgpu_sg_display) {
|
||||
case -1:
|
||||
break;
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
if ((totalram_pages() << (PAGE_SHIFT - 10)) +
|
||||
(adev->gmc.real_vram_size / 1024) >= 64000000) {
|
||||
DRM_WARN("Disabling S/G due to >=64GB RAM\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
|
||||
* speed switching. Until we have confirmation from Intel that a specific host
|
||||
|
@ -43,6 +43,7 @@
|
||||
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
|
||||
|
||||
#define AMDGPU_MAX_GC_INSTANCES 8
|
||||
#define KGD_MAX_QUEUES 128
|
||||
|
||||
#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
|
||||
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
|
||||
@ -257,7 +258,7 @@ struct amdgpu_cu_info {
|
||||
uint32_t number;
|
||||
uint32_t ao_cu_mask;
|
||||
uint32_t ao_cu_bitmap[4][4];
|
||||
uint32_t bitmap[4][4];
|
||||
uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
|
||||
};
|
||||
|
||||
struct amdgpu_gfx_ras {
|
||||
|
@ -839,7 +839,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
|
||||
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
|
||||
memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
|
||||
sizeof(adev->gfx.cu_info.bitmap));
|
||||
sizeof(dev_info->cu_bitmap));
|
||||
dev_info->vram_type = adev->gmc.vram_type;
|
||||
dev_info->vram_bit_width = adev->gmc.vram_width;
|
||||
dev_info->vce_harvest_config = adev->vce.harvest_config;
|
||||
@ -940,12 +940,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
struct atom_context *atom_context;
|
||||
|
||||
atom_context = adev->mode_info.atom_context;
|
||||
memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name));
|
||||
memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn));
|
||||
vbios_info.version = atom_context->version;
|
||||
memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
|
||||
sizeof(atom_context->vbios_ver_str));
|
||||
memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date));
|
||||
if (atom_context) {
|
||||
memcpy(vbios_info.name, atom_context->name,
|
||||
sizeof(atom_context->name));
|
||||
memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
|
||||
sizeof(atom_context->vbios_pn));
|
||||
vbios_info.version = atom_context->version;
|
||||
memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
|
||||
sizeof(atom_context->vbios_ver_str));
|
||||
memcpy(vbios_info.date, atom_context->date,
|
||||
sizeof(atom_context->date));
|
||||
}
|
||||
|
||||
return copy_to_user(out, &vbios_info,
|
||||
min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
|
||||
|
@ -1052,7 +1052,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
|
||||
info->ce_count = obj->err_data.ce_count;
|
||||
|
||||
if (err_data.ce_count) {
|
||||
if (adev->smuio.funcs &&
|
||||
if (!adev->aid_mask &&
|
||||
adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id &&
|
||||
adev->smuio.funcs->get_die_id) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d "
|
||||
@ -1072,7 +1073,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
|
||||
}
|
||||
}
|
||||
if (err_data.ue_count) {
|
||||
if (adev->smuio.funcs &&
|
||||
if (!adev->aid_mask &&
|
||||
adev->smuio.funcs &&
|
||||
adev->smuio.funcs->get_socket_id &&
|
||||
adev->smuio.funcs->get_die_id) {
|
||||
dev_info(adev->dev, "socket: %d, die: %d "
|
||||
|
@ -9449,7 +9449,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
|
||||
gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
|
||||
adev, disable_masks[i * 2 + j]);
|
||||
bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
|
||||
cu_info->bitmap[i][j] = bitmap;
|
||||
cu_info->bitmap[0][i][j] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
|
||||
if (bitmap & mask) {
|
||||
|
@ -6368,7 +6368,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
|
||||
* SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
|
||||
* SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
|
||||
*/
|
||||
cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap;
|
||||
cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
|
||||
if (bitmap & mask)
|
||||
|
@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
|
||||
gfx_v6_0_set_user_cu_inactive_bitmap(
|
||||
adev, disable_masks[i * 2 + j]);
|
||||
bitmap = gfx_v6_0_get_cu_enabled(adev);
|
||||
cu_info->bitmap[i][j] = bitmap;
|
||||
cu_info->bitmap[0][i][j] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
|
||||
if (bitmap & mask) {
|
||||
|
@ -5119,7 +5119,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
|
||||
gfx_v7_0_set_user_cu_inactive_bitmap(
|
||||
adev, disable_masks[i * 2 + j]);
|
||||
bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
|
||||
cu_info->bitmap[i][j] = bitmap;
|
||||
cu_info->bitmap[0][i][j] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
|
||||
if (bitmap & mask) {
|
||||
|
@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
|
||||
gfx_v8_0_set_user_cu_inactive_bitmap(
|
||||
adev, disable_masks[i * 2 + j]);
|
||||
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
|
||||
cu_info->bitmap[i][j] = bitmap;
|
||||
cu_info->bitmap[0][i][j] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
|
||||
if (bitmap & mask) {
|
||||
|
@ -1499,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
|
||||
amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
|
||||
if (cu_info->bitmap[i][j] & mask) {
|
||||
if (cu_info->bitmap[0][i][j] & mask) {
|
||||
if (counter == pg_always_on_cu_num)
|
||||
WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
|
||||
if (counter < always_on_cu_num)
|
||||
@ -7233,7 +7233,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
|
||||
* SE6,SH0 --> bitmap[2][1]
|
||||
* SE7,SH0 --> bitmap[3][1]
|
||||
*/
|
||||
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
|
||||
cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
|
||||
if (bitmap & mask) {
|
||||
|
@ -4259,7 +4259,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
|
||||
}
|
||||
|
||||
static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
||||
u32 bitmap)
|
||||
u32 bitmap, int xcc_id)
|
||||
{
|
||||
u32 data;
|
||||
|
||||
@ -4269,15 +4269,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
|
||||
data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
|
||||
data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
|
||||
|
||||
WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data);
|
||||
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
|
||||
}
|
||||
|
||||
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
|
||||
static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id)
|
||||
{
|
||||
u32 data, mask;
|
||||
|
||||
data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG);
|
||||
data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG);
|
||||
data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
|
||||
data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
|
||||
|
||||
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
|
||||
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
|
||||
@ -4290,7 +4290,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev)
|
||||
static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
|
||||
struct amdgpu_cu_info *cu_info)
|
||||
{
|
||||
int i, j, k, counter, active_cu_number = 0;
|
||||
int i, j, k, counter, xcc_id, active_cu_number = 0;
|
||||
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
|
||||
unsigned disable_masks[4 * 4];
|
||||
|
||||
@ -4309,46 +4309,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev,
|
||||
adev->gfx.config.max_sh_per_se);
|
||||
|
||||
mutex_lock(&adev->grbm_idx_mutex);
|
||||
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
|
||||
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
|
||||
mask = 1;
|
||||
ao_bitmap = 0;
|
||||
counter = 0;
|
||||
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0);
|
||||
gfx_v9_4_3_set_user_cu_inactive_bitmap(
|
||||
adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
|
||||
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
|
||||
for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
|
||||
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
|
||||
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
|
||||
mask = 1;
|
||||
ao_bitmap = 0;
|
||||
counter = 0;
|
||||
gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
|
||||
gfx_v9_4_3_set_user_cu_inactive_bitmap(
|
||||
adev,
|
||||
disable_masks[i * adev->gfx.config.max_sh_per_se + j],
|
||||
xcc_id);
|
||||
bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id);
|
||||
|
||||
/*
|
||||
* The bitmap(and ao_cu_bitmap) in cu_info structure is
|
||||
* 4x4 size array, and it's usually suitable for Vega
|
||||
* ASICs which has 4*2 SE/SH layout.
|
||||
* But for Arcturus, SE/SH layout is changed to 8*1.
|
||||
* To mostly reduce the impact, we make it compatible
|
||||
* with current bitmap array as below:
|
||||
* SE4,SH0 --> bitmap[0][1]
|
||||
* SE5,SH0 --> bitmap[1][1]
|
||||
* SE6,SH0 --> bitmap[2][1]
|
||||
* SE7,SH0 --> bitmap[3][1]
|
||||
*/
|
||||
cu_info->bitmap[i % 4][j + i / 4] = bitmap;
|
||||
cu_info->bitmap[xcc_id][i][j] = bitmap;
|
||||
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
|
||||
if (bitmap & mask) {
|
||||
if (counter < adev->gfx.config.max_cu_per_sh)
|
||||
ao_bitmap |= mask;
|
||||
counter++;
|
||||
for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
|
||||
if (bitmap & mask) {
|
||||
if (counter < adev->gfx.config.max_cu_per_sh)
|
||||
ao_bitmap |= mask;
|
||||
counter++;
|
||||
}
|
||||
mask <<= 1;
|
||||
}
|
||||
mask <<= 1;
|
||||
active_cu_number += counter;
|
||||
if (i < 2 && j < 2)
|
||||
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
|
||||
cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
|
||||
}
|
||||
active_cu_number += counter;
|
||||
if (i < 2 && j < 2)
|
||||
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
|
||||
cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
|
||||
}
|
||||
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
xcc_id);
|
||||
}
|
||||
gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0);
|
||||
mutex_unlock(&adev->grbm_idx_mutex);
|
||||
|
||||
cu_info->number = active_cu_number;
|
||||
|
@ -345,6 +345,9 @@ static void nbio_v4_3_init_registers(struct amdgpu_device *adev)
|
||||
data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK;
|
||||
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data);
|
||||
}
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
|
||||
regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
|
||||
}
|
||||
|
||||
static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
|
||||
|
@ -766,7 +766,7 @@ static int soc21_common_hw_init(void *handle)
|
||||
* for the purpose of expose those registers
|
||||
* to process space
|
||||
*/
|
||||
if (adev->nbio.funcs->remap_hdp_registers)
|
||||
if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
|
||||
adev->nbio.funcs->remap_hdp_registers(adev);
|
||||
/* enable the doorbell aperture */
|
||||
adev->nbio.funcs->enable_doorbell_aperture(adev, true);
|
||||
|
@ -2087,7 +2087,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
|
||||
|
||||
amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
|
||||
cu->num_simd_per_cu = cu_info.simd_per_cu;
|
||||
cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
|
||||
cu->num_simd_cores = cu_info.simd_per_cu *
|
||||
(cu_info.cu_active_number / kdev->kfd->num_nodes);
|
||||
cu->max_waves_simd = cu_info.max_waves_per_simd;
|
||||
|
||||
cu->wave_front_size = cu_info.wave_front_size;
|
||||
|
@ -79,6 +79,10 @@ struct crat_header {
|
||||
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
|
||||
#define CRAT_SUBTYPE_MAX 6
|
||||
|
||||
/*
|
||||
* Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
|
||||
* as it breaks the ABI.
|
||||
*/
|
||||
#define CRAT_SIBLINGMAP_SIZE 32
|
||||
|
||||
/*
|
||||
|
@ -1677,8 +1677,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
|
||||
dqm->dev->kfd2kgd->build_grace_period_packet_info(
|
||||
dqm->dev->adev, dqm->wait_times,
|
||||
grace_period, ®_offset,
|
||||
&dqm->wait_times,
|
||||
ffs(dqm->dev->xcc_mask) - 1);
|
||||
&dqm->wait_times);
|
||||
}
|
||||
|
||||
dqm_unlock(dqm);
|
||||
|
@ -162,6 +162,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
|
||||
return NULL;
|
||||
|
||||
*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
|
||||
inx *= 2;
|
||||
|
||||
pr_debug("Get kernel queue doorbell\n"
|
||||
" doorbell offset == 0x%08X\n"
|
||||
@ -176,6 +177,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
|
||||
unsigned int inx;
|
||||
|
||||
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
|
||||
inx /= 2;
|
||||
|
||||
mutex_lock(&kfd->doorbell_mutex);
|
||||
__clear_bit(inx, kfd->doorbell_bitmap);
|
||||
|
@ -97,18 +97,22 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
||||
uint32_t *se_mask)
|
||||
uint32_t *se_mask, uint32_t inst)
|
||||
{
|
||||
struct kfd_cu_info cu_info;
|
||||
uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
|
||||
bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
|
||||
uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
|
||||
int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1;
|
||||
int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1;
|
||||
uint32_t cu_active_per_node;
|
||||
int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
|
||||
int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
|
||||
|
||||
amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
|
||||
|
||||
if (cu_mask_count > cu_info.cu_active_number)
|
||||
cu_mask_count = cu_info.cu_active_number;
|
||||
cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
|
||||
if (cu_mask_count > cu_active_per_node)
|
||||
cu_mask_count = cu_active_per_node;
|
||||
|
||||
/* Exceeding these bounds corrupts the stack and indicates a coding error.
|
||||
* Returning with no CU's enabled will hang the queue, which should be
|
||||
@ -141,7 +145,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
for (se = 0; se < cu_info.num_shader_engines; se++)
|
||||
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
|
||||
cu_per_sh[se][sh] = hweight32(
|
||||
cu_info.cu_bitmap[se % 4][sh + (se / 4) * cu_bitmap_sh_mul]);
|
||||
cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
|
||||
cu_bitmap_sh_mul]);
|
||||
|
||||
/* Symmetrically map cu_mask to all SEs & SHs:
|
||||
* se_mask programs up to 2 SH in the upper and lower 16 bits.
|
||||
@ -164,20 +169,33 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
* cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
|
||||
* ...
|
||||
*
|
||||
* For GFX 9.4.3, the following code only looks at a
|
||||
* subset of the cu_mask corresponding to the inst parameter.
|
||||
* If we have n XCCs under one GPU node
|
||||
* cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0)
|
||||
* cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0)
|
||||
* ..
|
||||
* cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0)
|
||||
* cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0)
|
||||
*
|
||||
* For example, if there are 6 XCCs under 1 KFD node, this code
|
||||
* running for each inst, will look at the bits as:
|
||||
* inst, inst + 6, inst + 12...
|
||||
*
|
||||
* First ensure all CUs are disabled, then enable user specified CUs.
|
||||
*/
|
||||
for (i = 0; i < cu_info.num_shader_engines; i++)
|
||||
se_mask[i] = 0;
|
||||
|
||||
i = 0;
|
||||
for (cu = 0; cu < 16; cu += inc) {
|
||||
i = inst;
|
||||
for (cu = 0; cu < 16; cu += cu_inc) {
|
||||
for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
|
||||
for (se = 0; se < cu_info.num_shader_engines; se++) {
|
||||
if (cu_per_sh[se][sh] > cu) {
|
||||
if (cu_mask[i / 32] & (en_mask << (i % 32)))
|
||||
se_mask[se] |= en_mask << (cu + sh * 16);
|
||||
i += inc;
|
||||
if (i == cu_mask_count)
|
||||
if (i >= cu_mask_count)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
|
||||
|
||||
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
|
||||
const uint32_t *cu_mask, uint32_t cu_mask_count,
|
||||
uint32_t *se_mask);
|
||||
uint32_t *se_mask, uint32_t inst);
|
||||
|
||||
int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
|
||||
uint32_t pipe_id, uint32_t queue_id,
|
||||
|
@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
return;
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
|
@ -52,7 +52,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
return;
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
|
@ -71,7 +71,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
}
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
|
||||
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
m->compute_static_thread_mgmt_se1 = se_mask[1];
|
||||
@ -321,6 +321,43 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
|
||||
{
|
||||
struct v11_compute_mqd *m;
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
|
||||
}
|
||||
|
||||
static void restore_mqd(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *qp,
|
||||
const void *mqd_src,
|
||||
const void *ctl_stack_src, const u32 ctl_stack_size)
|
||||
{
|
||||
uint64_t addr;
|
||||
struct v11_compute_mqd *m;
|
||||
|
||||
m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
|
||||
addr = mqd_mem_obj->gpu_addr;
|
||||
|
||||
memcpy(m, mqd_src, sizeof(*m));
|
||||
|
||||
*mqd = m;
|
||||
if (gart_addr)
|
||||
*gart_addr = addr;
|
||||
|
||||
m->cp_hqd_pq_doorbell_control =
|
||||
qp->doorbell_off <<
|
||||
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
|
||||
pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
|
||||
m->cp_hqd_pq_doorbell_control);
|
||||
|
||||
qp->is_active = 0;
|
||||
}
|
||||
|
||||
|
||||
static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
|
||||
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
|
||||
struct queue_properties *q)
|
||||
@ -458,6 +495,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
|
||||
mqd->mqd_size = sizeof(struct v11_compute_mqd);
|
||||
mqd->get_wave_state = get_wave_state;
|
||||
mqd->mqd_stride = kfd_mqd_stride;
|
||||
mqd->checkpoint_mqd = checkpoint_mqd;
|
||||
mqd->restore_mqd = restore_mqd;
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
mqd->debugfs_show_mqd = debugfs_show_mqd;
|
||||
#endif
|
||||
@ -502,6 +541,8 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
|
||||
mqd->update_mqd = update_mqd_sdma;
|
||||
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
|
||||
mqd->is_occupied = kfd_is_occupied_sdma;
|
||||
mqd->checkpoint_mqd = checkpoint_mqd;
|
||||
mqd->restore_mqd = restore_mqd;
|
||||
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
|
||||
mqd->mqd_stride = kfd_mqd_stride;
|
||||
#if defined(CONFIG_DEBUG_FS)
|
||||
|
@ -60,7 +60,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
|
||||
}
|
||||
|
||||
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
struct mqd_update_info *minfo)
|
||||
struct mqd_update_info *minfo, uint32_t inst)
|
||||
{
|
||||
struct v9_mqd *m;
|
||||
uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
|
||||
@ -69,27 +69,36 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
return;
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
m->compute_static_thread_mgmt_se1 = se_mask[1];
|
||||
m->compute_static_thread_mgmt_se2 = se_mask[2];
|
||||
m->compute_static_thread_mgmt_se3 = se_mask[3];
|
||||
m->compute_static_thread_mgmt_se4 = se_mask[4];
|
||||
m->compute_static_thread_mgmt_se5 = se_mask[5];
|
||||
m->compute_static_thread_mgmt_se6 = se_mask[6];
|
||||
m->compute_static_thread_mgmt_se7 = se_mask[7];
|
||||
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) {
|
||||
m->compute_static_thread_mgmt_se4 = se_mask[4];
|
||||
m->compute_static_thread_mgmt_se5 = se_mask[5];
|
||||
m->compute_static_thread_mgmt_se6 = se_mask[6];
|
||||
m->compute_static_thread_mgmt_se7 = se_mask[7];
|
||||
|
||||
pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
|
||||
m->compute_static_thread_mgmt_se0,
|
||||
m->compute_static_thread_mgmt_se1,
|
||||
m->compute_static_thread_mgmt_se2,
|
||||
m->compute_static_thread_mgmt_se3,
|
||||
m->compute_static_thread_mgmt_se4,
|
||||
m->compute_static_thread_mgmt_se5,
|
||||
m->compute_static_thread_mgmt_se6,
|
||||
m->compute_static_thread_mgmt_se7);
|
||||
pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
|
||||
m->compute_static_thread_mgmt_se0,
|
||||
m->compute_static_thread_mgmt_se1,
|
||||
m->compute_static_thread_mgmt_se2,
|
||||
m->compute_static_thread_mgmt_se3,
|
||||
m->compute_static_thread_mgmt_se4,
|
||||
m->compute_static_thread_mgmt_se5,
|
||||
m->compute_static_thread_mgmt_se6,
|
||||
m->compute_static_thread_mgmt_se7);
|
||||
} else {
|
||||
pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
|
||||
inst, m->compute_static_thread_mgmt_se0,
|
||||
m->compute_static_thread_mgmt_se1,
|
||||
m->compute_static_thread_mgmt_se2,
|
||||
m->compute_static_thread_mgmt_se3);
|
||||
}
|
||||
}
|
||||
|
||||
static void set_priority(struct v9_mqd *m, struct queue_properties *q)
|
||||
@ -290,7 +299,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
|
||||
if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
|
||||
m->cp_hqd_ctx_save_control = 0;
|
||||
|
||||
update_cu_mask(mm, mqd, minfo);
|
||||
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3))
|
||||
update_cu_mask(mm, mqd, minfo, 0);
|
||||
set_priority(m, q);
|
||||
|
||||
q->is_active = QUEUE_IS_ACTIVE(*q);
|
||||
@ -676,6 +686,8 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
|
||||
m = get_mqd(mqd + size * xcc);
|
||||
update_mqd(mm, m, q, minfo);
|
||||
|
||||
update_cu_mask(mm, mqd, minfo, xcc);
|
||||
|
||||
if (q->format == KFD_QUEUE_FORMAT_AQL) {
|
||||
switch (xcc) {
|
||||
case 0:
|
||||
|
@ -55,7 +55,7 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
|
||||
return;
|
||||
|
||||
mqd_symmetrically_map_cu_mask(mm,
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
|
||||
minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
|
||||
|
||||
m = get_mqd(mqd);
|
||||
m->compute_static_thread_mgmt_se0 = se_mask[0];
|
||||
|
@ -299,8 +299,7 @@ static int pm_set_grace_period_v9(struct packet_manager *pm,
|
||||
pm->dqm->wait_times,
|
||||
grace_period,
|
||||
®_offset,
|
||||
®_data,
|
||||
0);
|
||||
®_data);
|
||||
|
||||
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
|
||||
reg_data = pm->dqm->wait_times;
|
||||
|
@ -1466,8 +1466,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
|
||||
|
||||
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
|
||||
{
|
||||
return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
|
||||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
|
||||
return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
|
||||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
|
||||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
|
||||
}
|
||||
|
@ -450,8 +450,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
|
||||
sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
|
||||
dev->node_props.cpu_cores_count);
|
||||
sysfs_show_32bit_prop(buffer, offs, "simd_count",
|
||||
dev->gpu ? (dev->node_props.simd_count *
|
||||
NUM_XCC(dev->gpu->xcc_mask)) : 0);
|
||||
dev->gpu ? dev->node_props.simd_count : 0);
|
||||
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
|
||||
dev->node_props.mem_banks_count);
|
||||
sysfs_show_32bit_prop(buffer, offs, "caches_count",
|
||||
@ -1597,14 +1596,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
|
||||
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
|
||||
struct kfd_gpu_cache_info *pcache_info,
|
||||
struct kfd_cu_info *cu_info,
|
||||
int cache_type, unsigned int cu_processor_id)
|
||||
int cache_type, unsigned int cu_processor_id,
|
||||
struct kfd_node *knode)
|
||||
{
|
||||
unsigned int cu_sibling_map_mask;
|
||||
int first_active_cu;
|
||||
int i, j, k;
|
||||
int i, j, k, xcc, start, end;
|
||||
struct kfd_cache_properties *pcache = NULL;
|
||||
|
||||
cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
|
||||
start = ffs(knode->xcc_mask) - 1;
|
||||
end = start + NUM_XCC(knode->xcc_mask);
|
||||
cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
|
||||
cu_sibling_map_mask &=
|
||||
((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
||||
first_active_cu = ffs(cu_sibling_map_mask);
|
||||
@ -1639,16 +1641,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
|
||||
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
|
||||
k = 0;
|
||||
|
||||
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
||||
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
|
||||
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
|
||||
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
|
||||
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
|
||||
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
|
||||
k += 4;
|
||||
for (xcc = start; xcc < end; xcc++) {
|
||||
for (i = 0; i < cu_info->num_shader_engines; i++) {
|
||||
for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
|
||||
pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
|
||||
pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
|
||||
pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
|
||||
pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
|
||||
k += 4;
|
||||
|
||||
cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
|
||||
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
||||
cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
|
||||
cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
pcache->sibling_map_size = k;
|
||||
@ -1666,7 +1670,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
|
||||
static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
|
||||
{
|
||||
struct kfd_gpu_cache_info *pcache_info = NULL;
|
||||
int i, j, k;
|
||||
int i, j, k, xcc, start, end;
|
||||
int ct = 0;
|
||||
unsigned int cu_processor_id;
|
||||
int ret;
|
||||
@ -1700,37 +1704,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
|
||||
* then it will consider only one CU from
|
||||
* the shared unit
|
||||
*/
|
||||
start = ffs(kdev->xcc_mask) - 1;
|
||||
end = start + NUM_XCC(kdev->xcc_mask);
|
||||
|
||||
for (ct = 0; ct < num_of_cache_types; ct++) {
|
||||
cu_processor_id = gpu_processor_id;
|
||||
if (pcache_info[ct].cache_level == 1) {
|
||||
for (i = 0; i < pcu_info->num_shader_engines; i++) {
|
||||
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
|
||||
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
|
||||
for (xcc = start; xcc < end; xcc++) {
|
||||
for (i = 0; i < pcu_info->num_shader_engines; i++) {
|
||||
for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
|
||||
for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
|
||||
|
||||
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
|
||||
pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
|
||||
ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
|
||||
pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
|
||||
cu_processor_id, k);
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (!ret) {
|
||||
num_of_entries++;
|
||||
list_add_tail(&props_ext->list, &dev->cache_props);
|
||||
if (!ret) {
|
||||
num_of_entries++;
|
||||
list_add_tail(&props_ext->list, &dev->cache_props);
|
||||
}
|
||||
|
||||
/* Move to next CU block */
|
||||
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
|
||||
pcu_info->num_cu_per_sh) ?
|
||||
pcache_info[ct].num_cu_shared :
|
||||
(pcu_info->num_cu_per_sh - k);
|
||||
cu_processor_id += num_cu_shared;
|
||||
}
|
||||
|
||||
/* Move to next CU block */
|
||||
num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
|
||||
pcu_info->num_cu_per_sh) ?
|
||||
pcache_info[ct].num_cu_shared :
|
||||
(pcu_info->num_cu_per_sh - k);
|
||||
cu_processor_id += num_cu_shared;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
|
||||
pcu_info, ct, cu_processor_id);
|
||||
pcu_info, ct, cu_processor_id, kdev);
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
@ -89,7 +89,7 @@ struct kfd_mem_properties {
|
||||
struct attribute attr;
|
||||
};
|
||||
|
||||
#define CACHE_SIBLINGMAP_SIZE 64
|
||||
#define CACHE_SIBLINGMAP_SIZE 128
|
||||
|
||||
struct kfd_cache_properties {
|
||||
struct list_head list;
|
||||
|
@ -1274,11 +1274,15 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
|
||||
|
||||
pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
|
||||
|
||||
page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF;
|
||||
page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12);
|
||||
page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF;
|
||||
page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12);
|
||||
page_table_base.high_part = upper_32_bits(pt_base) & 0xF;
|
||||
page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >>
|
||||
AMDGPU_GPU_PAGE_SHIFT);
|
||||
page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >>
|
||||
AMDGPU_GPU_PAGE_SHIFT);
|
||||
page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >>
|
||||
AMDGPU_GPU_PAGE_SHIFT);
|
||||
page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >>
|
||||
AMDGPU_GPU_PAGE_SHIFT);
|
||||
page_table_base.high_part = upper_32_bits(pt_base);
|
||||
page_table_base.low_part = lower_32_bits(pt_base);
|
||||
|
||||
pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18;
|
||||
@ -1640,8 +1644,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (init_data.flags.gpu_vm_support)
|
||||
init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
|
||||
if (init_data.flags.gpu_vm_support &&
|
||||
(amdgpu_sg_display == 0))
|
||||
init_data.flags.gpu_vm_support = false;
|
||||
|
||||
if (init_data.flags.gpu_vm_support)
|
||||
adev->mode_info.gpu_vm_support = true;
|
||||
@ -2335,14 +2340,62 @@ static int dm_late_init(void *handle)
|
||||
return detect_mst_link_for_all_connectors(adev_to_drm(adev));
|
||||
}
|
||||
|
||||
static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
|
||||
{
|
||||
int ret;
|
||||
u8 guid[16];
|
||||
u64 tmp64;
|
||||
|
||||
mutex_lock(&mgr->lock);
|
||||
if (!mgr->mst_primary)
|
||||
goto out_fail;
|
||||
|
||||
if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
|
||||
drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
|
||||
DP_MST_EN |
|
||||
DP_UP_REQ_EN |
|
||||
DP_UPSTREAM_IS_SRC);
|
||||
if (ret < 0) {
|
||||
drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n");
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
/* Some hubs forget their guids after they resume */
|
||||
ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
|
||||
if (ret != 16) {
|
||||
drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
if (memchr_inv(guid, 0, 16) == NULL) {
|
||||
tmp64 = get_jiffies_64();
|
||||
memcpy(&guid[0], &tmp64, sizeof(u64));
|
||||
memcpy(&guid[8], &tmp64, sizeof(u64));
|
||||
|
||||
ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16);
|
||||
|
||||
if (ret != 16) {
|
||||
drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n");
|
||||
goto out_fail;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(mgr->mst_primary->guid, guid, 16);
|
||||
|
||||
out_fail:
|
||||
mutex_unlock(&mgr->lock);
|
||||
}
|
||||
|
||||
static void s3_handle_mst(struct drm_device *dev, bool suspend)
|
||||
{
|
||||
struct amdgpu_dm_connector *aconnector;
|
||||
struct drm_connector *connector;
|
||||
struct drm_connector_list_iter iter;
|
||||
struct drm_dp_mst_topology_mgr *mgr;
|
||||
int ret;
|
||||
bool need_hotplug = false;
|
||||
|
||||
drm_connector_list_iter_begin(dev, &iter);
|
||||
drm_for_each_connector_iter(connector, &iter) {
|
||||
@ -2364,18 +2417,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
|
||||
if (!dp_is_lttpr_present(aconnector->dc_link))
|
||||
try_to_configure_aux_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
|
||||
|
||||
ret = drm_dp_mst_topology_mgr_resume(mgr, true);
|
||||
if (ret < 0) {
|
||||
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
|
||||
aconnector->dc_link);
|
||||
need_hotplug = true;
|
||||
}
|
||||
/* TODO: move resume_mst_branch_status() into drm mst resume again
|
||||
* once topology probing work is pulled out from mst resume into mst
|
||||
* resume 2nd step. mst resume 2nd step should be called after old
|
||||
* state getting restored (i.e. drm_atomic_helper_resume()).
|
||||
*/
|
||||
resume_mst_branch_status(mgr);
|
||||
}
|
||||
}
|
||||
drm_connector_list_iter_end(&iter);
|
||||
|
||||
if (need_hotplug)
|
||||
drm_kms_helper_hotplug_event(dev);
|
||||
}
|
||||
|
||||
static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
|
||||
@ -2769,7 +2819,8 @@ static int dm_resume(void *handle)
|
||||
struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
|
||||
enum dc_connection_type new_connection_type = dc_connection_none;
|
||||
struct dc_state *dc_state;
|
||||
int i, r, j;
|
||||
int i, r, j, ret;
|
||||
bool need_hotplug = false;
|
||||
|
||||
if (amdgpu_in_reset(adev)) {
|
||||
dc_state = dm->cached_dc_state;
|
||||
@ -2867,7 +2918,7 @@ static int dm_resume(void *handle)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* this is the case when traversing through already created
|
||||
* this is the case when traversing through already created end sink
|
||||
* MST connectors, should be skipped
|
||||
*/
|
||||
if (aconnector && aconnector->mst_root)
|
||||
@ -2927,6 +2978,27 @@ static int dm_resume(void *handle)
|
||||
|
||||
dm->cached_state = NULL;
|
||||
|
||||
/* Do mst topology probing after resuming cached state*/
|
||||
drm_connector_list_iter_begin(ddev, &iter);
|
||||
drm_for_each_connector_iter(connector, &iter) {
|
||||
aconnector = to_amdgpu_dm_connector(connector);
|
||||
if (aconnector->dc_link->type != dc_connection_mst_branch ||
|
||||
aconnector->mst_root)
|
||||
continue;
|
||||
|
||||
ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true);
|
||||
|
||||
if (ret < 0) {
|
||||
dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
|
||||
aconnector->dc_link);
|
||||
need_hotplug = true;
|
||||
}
|
||||
}
|
||||
drm_connector_list_iter_end(&iter);
|
||||
|
||||
if (need_hotplug)
|
||||
drm_kms_helper_hotplug_event(ddev);
|
||||
|
||||
amdgpu_dm_irq_resume_late(adev);
|
||||
|
||||
amdgpu_dm_smu_write_watermarks_table(adev);
|
||||
@ -8073,7 +8145,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
|
||||
bundle->surface_updates[planes_count].plane_info =
|
||||
&bundle->plane_infos[planes_count];
|
||||
|
||||
if (acrtc_state->stream->link->psr_settings.psr_feature_enabled) {
|
||||
if (acrtc_state->stream->link->psr_settings.psr_feature_enabled ||
|
||||
acrtc_state->stream->link->replay_settings.replay_feature_enabled) {
|
||||
fill_dc_dirty_rects(plane, old_plane_state,
|
||||
new_plane_state, new_crtc_state,
|
||||
&bundle->flip_addrs[planes_count],
|
||||
|
@ -620,7 +620,7 @@ struct amdgpu_hdmi_vsdb_info {
|
||||
unsigned int max_refresh_rate_hz;
|
||||
|
||||
/**
|
||||
* @replay mode: Replay supported
|
||||
* @replay_mode: Replay supported
|
||||
*/
|
||||
bool replay_mode;
|
||||
};
|
||||
|
@ -169,11 +169,23 @@ static void add_link_enc_assignment(
|
||||
/* Return first available DIG link encoder. */
|
||||
static enum engine_id find_first_avail_link_enc(
|
||||
const struct dc_context *ctx,
|
||||
const struct dc_state *state)
|
||||
const struct dc_state *state,
|
||||
enum engine_id eng_id_requested)
|
||||
{
|
||||
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
|
||||
int i;
|
||||
|
||||
if (eng_id_requested != ENGINE_ID_UNKNOWN) {
|
||||
|
||||
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
|
||||
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
|
||||
if (eng_id == eng_id_requested)
|
||||
return eng_id;
|
||||
}
|
||||
}
|
||||
|
||||
eng_id = ENGINE_ID_UNKNOWN;
|
||||
|
||||
for (i = 0; i < ctx->dc->res_pool->res_cap->num_dig_link_enc; i++) {
|
||||
eng_id = state->res_ctx.link_enc_cfg_ctx.link_enc_avail[i];
|
||||
if (eng_id != ENGINE_ID_UNKNOWN)
|
||||
@ -287,7 +299,7 @@ void link_enc_cfg_link_encs_assign(
|
||||
struct dc_stream_state *streams[],
|
||||
uint8_t stream_count)
|
||||
{
|
||||
enum engine_id eng_id = ENGINE_ID_UNKNOWN;
|
||||
enum engine_id eng_id = ENGINE_ID_UNKNOWN, eng_id_req = ENGINE_ID_UNKNOWN;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
@ -377,8 +389,14 @@ void link_enc_cfg_link_encs_assign(
|
||||
* assigned to that endpoint.
|
||||
*/
|
||||
link_enc = get_link_enc_used_by_link(state, stream->link);
|
||||
if (link_enc == NULL)
|
||||
eng_id = find_first_avail_link_enc(stream->ctx, state);
|
||||
if (link_enc == NULL) {
|
||||
|
||||
if (stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
|
||||
stream->link->dpia_preferred_eng_id != ENGINE_ID_UNKNOWN)
|
||||
eng_id_req = stream->link->dpia_preferred_eng_id;
|
||||
|
||||
eng_id = find_first_avail_link_enc(stream->ctx, state, eng_id_req);
|
||||
}
|
||||
else
|
||||
eng_id = link_enc->preferred_engine;
|
||||
|
||||
@ -402,7 +420,9 @@ void link_enc_cfg_link_encs_assign(
|
||||
DC_LOG_DEBUG("%s: CUR %s(%d) - enc_id(%d)\n",
|
||||
__func__,
|
||||
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
|
||||
assignment.ep_id.link_id.enum_id - 1,
|
||||
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
|
||||
assignment.ep_id.link_id.enum_id :
|
||||
assignment.ep_id.link_id.enum_id - 1,
|
||||
assignment.eng_id);
|
||||
}
|
||||
for (i = 0; i < MAX_PIPES; i++) {
|
||||
@ -413,7 +433,9 @@ void link_enc_cfg_link_encs_assign(
|
||||
DC_LOG_DEBUG("%s: NEW %s(%d) - enc_id(%d)\n",
|
||||
__func__,
|
||||
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ? "PHY" : "DPIA",
|
||||
assignment.ep_id.link_id.enum_id - 1,
|
||||
assignment.ep_id.ep_type == DISPLAY_ENDPOINT_PHY ?
|
||||
assignment.ep_id.link_id.enum_id :
|
||||
assignment.ep_id.link_id.enum_id - 1,
|
||||
assignment.eng_id);
|
||||
}
|
||||
|
||||
@ -478,7 +500,6 @@ struct dc_link *link_enc_cfg_get_link_using_link_enc(
|
||||
if (stream)
|
||||
link = stream->link;
|
||||
|
||||
// dm_output_to_console("%s: No link using DIG(%d).\n", __func__, eng_id);
|
||||
return link;
|
||||
}
|
||||
|
||||
|
@ -1496,6 +1496,7 @@ struct dc_link {
|
||||
* object creation.
|
||||
*/
|
||||
enum engine_id eng_id;
|
||||
enum engine_id dpia_preferred_eng_id;
|
||||
|
||||
bool test_pattern_enabled;
|
||||
enum dp_test_pattern current_test_pattern;
|
||||
|
@ -964,7 +964,9 @@ void dce110_edp_backlight_control(
|
||||
return;
|
||||
}
|
||||
|
||||
if (link->panel_cntl) {
|
||||
if (link->panel_cntl && !(link->dpcd_sink_ext_caps.bits.oled ||
|
||||
link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
|
||||
link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) {
|
||||
bool is_backlight_on = link->panel_cntl->funcs->is_panel_backlight_on(link->panel_cntl);
|
||||
|
||||
if ((enable && is_backlight_on) || (!enable && !is_backlight_on)) {
|
||||
|
@ -1032,6 +1032,28 @@ static const struct dce_i2c_mask i2c_masks = {
|
||||
I2C_COMMON_MASK_SH_LIST_DCN30(_MASK)
|
||||
};
|
||||
|
||||
/* ========================================================== */
|
||||
|
||||
/*
|
||||
* DPIA index | Preferred Encoder | Host Router
|
||||
* 0 | C | 0
|
||||
* 1 | First Available | 0
|
||||
* 2 | D | 1
|
||||
* 3 | First Available | 1
|
||||
*/
|
||||
/* ========================================================== */
|
||||
static const enum engine_id dpia_to_preferred_enc_id_table[] = {
|
||||
ENGINE_ID_DIGC,
|
||||
ENGINE_ID_DIGC,
|
||||
ENGINE_ID_DIGD,
|
||||
ENGINE_ID_DIGD
|
||||
};
|
||||
|
||||
static enum engine_id dcn314_get_preferred_eng_id_dpia(unsigned int dpia_index)
|
||||
{
|
||||
return dpia_to_preferred_enc_id_table[dpia_index];
|
||||
}
|
||||
|
||||
static struct dce_i2c_hw *dcn31_i2c_hw_create(
|
||||
struct dc_context *ctx,
|
||||
uint32_t inst)
|
||||
@ -1785,6 +1807,7 @@ static struct resource_funcs dcn314_res_pool_funcs = {
|
||||
.update_bw_bounding_box = dcn314_update_bw_bounding_box,
|
||||
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
|
||||
.get_panel_config_defaults = dcn314_get_panel_config_defaults,
|
||||
.get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia,
|
||||
};
|
||||
|
||||
static struct clock_source *dcn30_clock_source_create(
|
||||
|
@ -65,6 +65,7 @@ struct resource_context;
|
||||
struct clk_bw_params;
|
||||
|
||||
struct resource_funcs {
|
||||
enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index);
|
||||
void (*destroy)(struct resource_pool **pool);
|
||||
void (*link_init)(struct dc_link *link);
|
||||
struct panel_cntl*(*panel_cntl_create)(
|
||||
|
@ -791,6 +791,10 @@ static bool construct_dpia(struct dc_link *link,
|
||||
/* Set dpia port index : 0 to number of dpia ports */
|
||||
link->ddc_hw_inst = init_params->connector_index;
|
||||
|
||||
// Assign Dpia preferred eng_id
|
||||
if (link->dc->res_pool->funcs->get_preferred_eng_id_dpia)
|
||||
link->dpia_preferred_eng_id = link->dc->res_pool->funcs->get_preferred_eng_id_dpia(link->ddc_hw_inst);
|
||||
|
||||
/* TODO: Create link encoder */
|
||||
|
||||
link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED;
|
||||
|
@ -31,12 +31,12 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/dma-fence.h>
|
||||
#include "amdgpu_irq.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
|
||||
struct pci_dev;
|
||||
struct amdgpu_device;
|
||||
|
||||
#define KGD_MAX_QUEUES 128
|
||||
|
||||
struct kfd_dev;
|
||||
struct kgd_mem;
|
||||
|
||||
@ -68,7 +68,7 @@ struct kfd_cu_info {
|
||||
uint32_t wave_front_size;
|
||||
uint32_t max_scratch_slots_per_cu;
|
||||
uint32_t lds_size;
|
||||
uint32_t cu_bitmap[4][4];
|
||||
uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
|
||||
};
|
||||
|
||||
/* For getting GPU local memory information from KGD */
|
||||
@ -326,8 +326,7 @@ struct kfd2kgd_calls {
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data,
|
||||
uint32_t inst);
|
||||
uint32_t *reg_data);
|
||||
void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
|
||||
int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
|
||||
void (*program_trap_handler_settings)(struct amdgpu_device *adev,
|
||||
|
@ -336,7 +336,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
|
||||
|
||||
/* Store one-time values in driver PPTable */
|
||||
if (!pptable->Init) {
|
||||
while (retry--) {
|
||||
while (--retry) {
|
||||
ret = smu_v13_0_6_get_metrics_table(smu, NULL, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
Loading…
Reference in New Issue
Block a user