aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorDave Airlie2021-07-08 08:45:20 +1000
committerDave Airlie2021-07-08 08:45:20 +1000
commit0d3a1b37ab931fe31bf740be6fa135d770ade677 (patch)
treef4c074383f9a8602993bd1ca10480a2b01104533 /drivers
parent8a02ea42bc1d4c448caf1bab0e05899dad503f74 (diff)
parent93c5bcd4eaaafd7c25c062089806c86d9b7890dd (diff)
Merge tag 'amd-drm-next-5.14-2021-07-01' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.14-2021-07-01: amdgpu: - Misc Navi fixes - Powergating fix - Yellow Carp updates - Beige Goby updates - S0ix fix - Revert overlay validation fix - GPU reset fix for DC - PPC64 fix - Add new dimgrey cavefish DID - RAS fix amdkfd: - SVM fixes radeon: - Fix missing drm_gem_object_put in error path Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210701042241.25449-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c266
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c100
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c272
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c236
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h19
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c31
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile8
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/irq_service.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c60
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h16
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c5
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h10
-rw-r--r--drivers/gpu/drm/amd/include/navi10_enum.h2
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c95
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c8
40 files changed, 987 insertions, 464 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 130a9adf09ef..d303e88e3c23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1369,6 +1369,38 @@ def_value:
adev->pm.smu_prv_buffer_size = 0;
}
+static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
+{
+ if (!(adev->flags & AMD_IS_APU) ||
+ adev->asic_type < CHIP_RAVEN)
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
+ if (adev->pdev->device == 0x15dd)
+ adev->apu_flags |= AMD_APU_IS_RAVEN;
+ if (adev->pdev->device == 0x15d8)
+ adev->apu_flags |= AMD_APU_IS_PICASSO;
+ break;
+ case CHIP_RENOIR:
+ if ((adev->pdev->device == 0x1636) ||
+ (adev->pdev->device == 0x164c))
+ adev->apu_flags |= AMD_APU_IS_RENOIR;
+ else
+ adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
+ break;
+ case CHIP_VANGOGH:
+ adev->apu_flags |= AMD_APU_IS_VANGOGH;
+ break;
+ case CHIP_YELLOW_CARP:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_device_check_arguments - validate module params
*
@@ -3386,6 +3418,10 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
+ r = amdgpu_device_init_apu_flags(adev);
+ if (r)
+ return r;
+
r = amdgpu_device_check_arguments(adev);
if (r)
return r;
@@ -4304,6 +4340,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
case CHIP_DIMGREY_CAVEFISH:
+ case CHIP_BEIGE_GOBY:
case CHIP_VANGOGH:
case CHIP_ALDEBARAN:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6f30c525caac..71beb0db0125 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -160,6 +160,7 @@ int amdgpu_smu_pptable_id = -1;
* highest. That helps saving some idle power.
* DISABLE_FRACTIONAL_PWM (bit 2) disabled by default
* PSR (bit 3) disabled by default
+ * EDP NO POWER SEQUENCING (bit 4) disabled by default
*/
uint amdgpu_dc_feature_mask = 2;
uint amdgpu_dc_debug_mask;
@@ -1198,6 +1199,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
/* Aldebaran */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 0174f7817ce2..d0b8d415b63b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -562,6 +562,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case CHIP_NAVI14:
case CHIP_NAVI12:
case CHIP_VANGOGH:
+ case CHIP_YELLOW_CARP:
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index d6c54c7f7679..4b153daf283d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -160,7 +160,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct mm_struct *mm, struct page **pages,
uint64_t start, uint64_t npages,
struct hmm_range **phmm_range, bool readonly,
- bool mmap_locked)
+ bool mmap_locked, void *owner)
{
struct hmm_range *hmm_range;
unsigned long timeout;
@@ -185,6 +185,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
hmm_range->hmm_pfns = pfns;
hmm_range->start = start;
hmm_range->end = start + npages * PAGE_SIZE;
+ hmm_range->dev_private_owner = owner;
/* Assuming 512MB takes maxmium 1 second to fault page address */
timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index 7f7d37a457c3..14a3c1864085 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
struct mm_struct *mm, struct page **pages,
uint64_t start, uint64_t npages,
struct hmm_range **phmm_range, bool readonly,
- bool mmap_locked);
+ bool mmap_locked, void *owner);
int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
#if defined(CONFIG_HMM_MIRROR)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 25ee53545837..45295dce5c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -93,6 +93,8 @@ struct amdgpu_nbio_funcs {
void (*enable_aspm)(struct amdgpu_device *adev,
bool enable);
void (*program_aspm)(struct amdgpu_device *adev);
+ void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
+ void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
};
struct amdgpu_nbio {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 6a214a4dfe04..2e9ad6e0dfbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -590,10 +590,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.offset += adev->gmc.aper_base;
mem->bus.is_iomem = true;
- if (adev->gmc.xgmi.connected_to_cpu)
- mem->bus.caching = ttm_cached;
- else
- mem->bus.caching = ttm_write_combined;
break;
default:
return -EINVAL;
@@ -696,7 +692,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
readonly = amdgpu_ttm_tt_is_readonly(ttm);
r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
ttm->num_pages, &gtt->range, readonly,
- false);
+ false, NULL);
out_putmm:
mmput(mm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 436ec246a7da..2fd77c36a1ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -463,6 +463,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (i == 1)
node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+ if (adev->gmc.xgmi.connected_to_cpu)
+ node->base.bus.caching = ttm_cached;
+ else
+ node->base.bus.caching = ttm_write_combined;
+
atomic64_add(vis_usage, &mgr->vis_usage);
*res = &node->base;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index 5b90efd6f6d0..3ac505d954c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -36,9 +36,12 @@ athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ return;
+
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ if (enable)
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -53,10 +56,13 @@ athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
{
uint32_t def, data;
+ if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)))
+ return;
+
def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
- (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ if (enable)
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
else
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 2d56b60bc058..f5e9c022960b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5086,47 +5086,44 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
4 + /* RMI */
1); /* SQG */
- if (adev->asic_type == CHIP_NAVI10 ||
- adev->asic_type == CHIP_NAVI14 ||
- adev->asic_type == CHIP_NAVI12) {
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
- for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
- wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
- /*
- * Set corresponding TCP bits for the inactive WGPs in
- * GCRD_SA_TARGETS_DISABLE
- */
- gcrd_targets_disable_tcp = 0;
- /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
- utcl_invreq_disable = 0;
-
- for (k = 0; k < max_wgp_per_sh; k++) {
- if (!(wgp_active_bitmap & (1 << k))) {
- gcrd_targets_disable_tcp |= 3 << (2 * k);
- utcl_invreq_disable |= (3 << (2 * k)) |
- (3 << (2 * (max_wgp_per_sh + k)));
- }
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+ /*
+ * Set corresponding TCP bits for the inactive WGPs in
+ * GCRD_SA_TARGETS_DISABLE
+ */
+ gcrd_targets_disable_tcp = 0;
+ /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
+ utcl_invreq_disable = 0;
+
+ for (k = 0; k < max_wgp_per_sh; k++) {
+ if (!(wgp_active_bitmap & (1 << k))) {
+ gcrd_targets_disable_tcp |= 3 << (2 * k);
+ gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2));
+ utcl_invreq_disable |= (3 << (2 * k)) |
+ (3 << (2 * (max_wgp_per_sh + k)));
}
-
- tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
- /* only override TCP & SQC bits */
- tmp &= 0xffffffff << (4 * max_wgp_per_sh);
- tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
- WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
-
- tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
- /* only override TCP bits */
- tmp &= 0xffffffff << (2 * max_wgp_per_sh);
- tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
- WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
}
- }
- gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- mutex_unlock(&adev->grbm_idx_mutex);
+ tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
+ /* only override TCP & SQC bits */
+ tmp &= (0xffffffffU << (4 * max_wgp_per_sh));
+ tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
+ WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
+ /* only override TCP & SQC bits */
+ tmp &= (0xffffffffU << (3 * max_wgp_per_sh));
+ tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
+ WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
+ }
}
+
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
}
static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
@@ -7404,7 +7401,10 @@ static int gfx_v10_0_hw_init(void *handle)
* init golden registers and rlc resume may override some registers,
* reconfig them here
*/
- gfx_v10_0_tcp_harvest(adev);
+ if (adev->asic_type == CHIP_NAVI10 ||
+ adev->asic_type == CHIP_NAVI14 ||
+ adev->asic_type == CHIP_NAVI12)
+ gfx_v10_0_tcp_harvest(adev);
r = gfx_v10_0_cp_resume(adev);
if (r)
@@ -7777,6 +7777,9 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+ return;
+
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 0 - Disable some blocks' MGCG */
@@ -7791,6 +7794,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
if (def != data)
@@ -7813,13 +7817,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
- } else {
+ } else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
- RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
@@ -7845,22 +7851,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
{
uint32_t data, def;
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)))
+ return;
+
/* Enable 3D CGCG/CGLS */
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ if (enable) {
/* write cmd to clear cgcg/cgls ov */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
/* unset CGCG override */
- data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
/* enable 3Dcgcg FSM(0x0000363f) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
- data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
- RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
@@ -7873,9 +7891,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
} else {
/* Disable CGCG/CGLS */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+
/* disable cgcg, cgls should be disabled */
- data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
- RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
/* disable cgcg and cgls in FSM */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
@@ -7887,25 +7910,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)))
+ return;
+
+ if (enable) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
/* unset CGCG override */
- data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
- else
- data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
/* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
- RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ data = 0;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
@@ -7917,8 +7950,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
} else {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+
/* reset CGCG/CGLS bits */
- data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
/* disable cgcg and cgls in FSM */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
@@ -7930,7 +7969,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
{
uint32_t def, data;
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) {
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+ return;
+
+ if (enable) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset FGCG override */
data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
@@ -7961,6 +8003,97 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
}
}
+static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+ uint32_t reg_idx = 0;
+ uint32_t i;
+
+ const uint32_t tcp_ctrl_regs[] = {
+ mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG
+ };
+
+ const uint32_t tcp_ctrl_regs_nv12[] = {
+ mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+ mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+ };
+
+ const uint32_t sm_ctlr_regs[] = {
+ mmCGTS_SA0_QUAD0_SM_CTRL_REG,
+ mmCGTS_SA0_QUAD1_SM_CTRL_REG,
+ mmCGTS_SA1_QUAD0_SM_CTRL_REG,
+ mmCGTS_SA1_QUAD1_SM_CTRL_REG
+ };
+
+ if (adev->asic_type == CHIP_NAVI12) {
+ for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+ tcp_ctrl_regs_nv12[i];
+ reg_data = RREG32(reg_idx);
+ reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+ WREG32(reg_idx, reg_data);
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+ tcp_ctrl_regs[i];
+ reg_data = RREG32(reg_idx);
+ reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+ WREG32(reg_idx, reg_data);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) {
+ reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] +
+ sm_ctlr_regs[i];
+ reg_data = RREG32(reg_idx);
+ reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK;
+ reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT;
+ WREG32(reg_idx, reg_data);
+ }
+}
+
static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
@@ -7977,6 +8110,10 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v10_0_update_3d_clock_gating(adev, enable);
/* === CGCG + CGLS === */
gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+
+ if ((adev->asic_type >= CHIP_NAVI10) &&
+ (adev->asic_type <= CHIP_NAVI12))
+ gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
} else {
/* CGCG/CGLS should be disabled before MGCG/MGLS
* === CGCG + CGLS ===
@@ -9324,17 +9461,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *
static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
{
- u32 data, wgp_bitmask;
- data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
- data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+ u32 disabled_mask =
+ ~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+ u32 efuse_setting = 0;
+ u32 vbios_setting = 0;
+
+ efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+ efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
- data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
- data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+ vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
- wgp_bitmask =
- amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+ disabled_mask |= efuse_setting | vbios_setting;
- return (~data) & wgp_bitmask;
+ return (~disabled_mask);
}
static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index 7a15e669b68d..5793977953cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -90,45 +90,56 @@ static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev,
RC_MEM_POWER_SD_EN, 0);
WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
- /* only one clock gating mode (LS/DS/SD) can be enabled */
- if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_LS_EN, enable);
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_LS_EN, enable);
- } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_DS_EN, enable);
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_DS_EN, enable);
- } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_SD_EN, enable);
- /* RC should not use shut down mode, fallback to ds */
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
- HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_DS_EN, enable);
- }
-
- /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
- * be set for SRAM LS/DS/SD */
- if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
- AMD_CG_SUPPORT_HDP_SD)) {
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
- IPH_MEM_POWER_CTRL_EN, 1);
- hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
- RC_MEM_POWER_CTRL_EN, 1);
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_SD_EN, 1);
+ /* RC should not use shut down mode, fallback to ds or ls if allowed */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS)
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ }
+
+ /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+ * be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
}
- WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
-
- /* restore IPH & RC clock override after clock/power mode changing */
- WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1);
+ /* disable IPH & RC clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ IPH_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
}
static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index f7e93bbc4e15..7ded6b2f058e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -568,6 +568,9 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
{
uint32_t def, data, def1, data1;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ return;
+
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
@@ -582,7 +585,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
break;
}
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ if (enable) {
data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
@@ -627,6 +630,9 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
{
uint32_t def, data;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ return;
+
switch (adev->asic_type) {
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
@@ -639,7 +645,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
break;
}
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ if (enable)
data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 05ddec7ba7e2..7b79eeaa88aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -51,6 +51,8 @@
#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8
#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2
+#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288
+
static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
{
WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -218,8 +220,11 @@ static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
def = data = RREG32_PCIE(smnCPM_CONTROL);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
+ if (enable) {
data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
@@ -244,8 +249,11 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
def = data = RREG32_PCIE(smnPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ if (enable) {
data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
PCIE_CNTL2__MST_MEM_LS_EN_MASK |
PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
@@ -463,6 +471,43 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
WREG32_PCIE(smnPCIE_LC_CNTL3, data);
}
+static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+ uint32_t link_width = 0;
+
+ if (!((adev->asic_type >= CHIP_NAVI10) &&
+ (adev->asic_type <= CHIP_NAVI12)))
+ return;
+
+ reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+ link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+ >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+
+ /*
+ * Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data)
+ * if link_width is 0x3 (x4)
+ */
+ if (0x3 == link_width) {
+ reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+ reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK;
+ reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT);
+ WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data);
+ }
+}
+
+static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev)
+{
+ uint32_t reg_data = 0;
+
+ if (adev->asic_type != CHIP_NAVI10)
+ return;
+
+ reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+ reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK;
+ WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data);
+}
+
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -484,4 +529,6 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
.enable_aspm = nbio_v2_3_enable_aspm,
.program_aspm = nbio_v2_3_program_aspm,
+ .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
+ .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 455d0425787c..94a2c0742ee5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -64,6 +64,13 @@
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
+#define codec_info_build(type, width, height, level) \
+ .codec_type = type,\
+ .max_width = width,\
+ .max_height = height,\
+ .max_pixels_per_frame = height * width,\
+ .max_level = level,
+
static const struct amd_ip_funcs nv_common_ip_funcs;
/* Navi */
@@ -309,6 +316,23 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =
.codec_array = sriov_sc_video_codecs_decode_array,
};
+/* Beige Goby*/
+static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs bg_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array),
+ .codec_array = bg_video_codecs_decode_array,
+};
+
+static const struct amdgpu_video_codecs bg_video_codecs_encode = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
@@ -335,6 +359,12 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &sc_video_codecs_decode;
return 0;
+ case CHIP_BEIGE_GOBY:
+ if (encode)
+ *codecs = &bg_video_codecs_encode;
+ else
+ *codecs = &bg_video_codecs_decode;
+ return 0;
case CHIP_NAVI10:
case CHIP_NAVI14:
case CHIP_NAVI12:
@@ -1275,7 +1305,6 @@ static int nv_common_early_init(void *handle)
break;
case CHIP_VANGOGH:
- adev->apu_flags |= AMD_APU_IS_VANGOGH;
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
AMD_CG_SUPPORT_GFX_CP_LS |
@@ -1411,6 +1440,12 @@ static int nv_common_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->nbio.funcs->apply_lc_spc_mode_wa)
+ adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
+
+ if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa)
+ adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev);
+
/* enable pcie gen2/3 link */
nv_pcie_gen3_enable(adev);
/* enable aspm */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index ae5464e2535a..8931000dcd41 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -144,7 +144,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
@@ -288,7 +288,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe)
};
@@ -1896,8 +1896,11 @@ static int sdma_v4_0_late_init(void *handle)
sdma_v4_0_setup_ulv(adev);
- if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
- adev->sdma.funcs->reset_ras_error_count(adev);
+ if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+ if (adev->sdma.funcs &&
+ adev->sdma.funcs->reset_ras_error_count)
+ adev->sdma.funcs->reset_ras_error_count(adev);
+ }
if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
return adev->sdma.funcs->ras_late_init(adev, &ih_info);
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
index e9c474c217ec..b6f1322f908c 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
@@ -43,9 +43,12 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
if (adev->flags & AMD_IS_APU)
return;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ return;
+
def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+ if (enable)
data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
else
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index de85577c9cfd..b02436401d46 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1360,10 +1360,7 @@ static int soc15_common_early_init(void *handle)
break;
case CHIP_RAVEN:
adev->asic_funcs = &soc15_asic_funcs;
- if (adev->pdev->device == 0x15dd)
- adev->apu_flags |= AMD_APU_IS_RAVEN;
- if (adev->pdev->device == 0x15d8)
- adev->apu_flags |= AMD_APU_IS_PICASSO;
+
if (adev->rev_id >= 0x8)
adev->apu_flags |= AMD_APU_IS_RAVEN2;
@@ -1455,11 +1452,6 @@ static int soc15_common_early_init(void *handle)
break;
case CHIP_RENOIR:
adev->asic_funcs = &soc15_asic_funcs;
- if ((adev->pdev->device == 0x1636) ||
- (adev->pdev->device == 0x164c))
- adev->apu_flags |= AMD_APU_IS_RENOIR;
- else
- adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
if (adev->apu_flags & AMD_APU_IS_RENOIR)
adev->external_rev_id = adev->rev_id + 0x91;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 2660f03e63a7..dab290a4d19d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -218,7 +218,8 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
struct page *page;
page = pfn_to_page(pfn);
- page->zone_device_data = prange;
+ svm_range_bo_ref(prange->svm_bo);
+ page->zone_device_data = prange->svm_bo;
get_page(page);
lock_page(page);
}
@@ -293,15 +294,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
for (i = j = 0; i < npages; i++) {
struct page *spage;
- dst[i] = cursor.start + (j << PAGE_SHIFT);
- migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
- svm_migrate_get_vram_page(prange, migrate->dst[i]);
-
- migrate->dst[i] = migrate_pfn(migrate->dst[i]);
- migrate->dst[i] |= MIGRATE_PFN_LOCKED;
-
- if (migrate->src[i] & MIGRATE_PFN_VALID) {
- spage = migrate_pfn_to_page(migrate->src[i]);
+ spage = migrate_pfn_to_page(migrate->src[i]);
+ if (spage && !is_zone_device_page(spage)) {
+ dst[i] = cursor.start + (j << PAGE_SHIFT);
+ migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+ svm_migrate_get_vram_page(prange, migrate->dst[i]);
+ migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+ migrate->dst[i] |= MIGRATE_PFN_LOCKED;
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
@@ -355,6 +354,20 @@ out_free_vram_pages:
}
}
+#ifdef DEBUG_FORCE_MIXED_DOMAINS
+ for (i = 0, j = 0; i < npages; i += 4, j++) {
+ if (j & 1)
+ continue;
+ svm_migrate_put_vram_page(adev, dst[i]);
+ migrate->dst[i] = 0;
+ svm_migrate_put_vram_page(adev, dst[i + 1]);
+ migrate->dst[i + 1] = 0;
+ svm_migrate_put_vram_page(adev, dst[i + 2]);
+ migrate->dst[i + 2] = 0;
+ svm_migrate_put_vram_page(adev, dst[i + 3]);
+ migrate->dst[i + 3] = 0;
+ }
+#endif
out:
return r;
}
@@ -365,20 +378,20 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
uint64_t end)
{
uint64_t npages = (end - start) >> PAGE_SHIFT;
+ struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
dma_addr_t *scratch;
size_t size;
void *buf;
int r = -ENOMEM;
- int retry = 0;
memset(&migrate, 0, sizeof(migrate));
migrate.vma = vma;
migrate.start = start;
migrate.end = end;
migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
- migrate.pgmap_owner = adev;
+ migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
size *= npages;
@@ -390,7 +403,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages);
-retry:
r = migrate_vma_setup(&migrate);
if (r) {
pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
@@ -398,17 +410,9 @@ retry:
goto out_free;
}
if (migrate.cpages != npages) {
- pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages,
+ pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n",
+ migrate.cpages,
npages);
- migrate_vma_finalize(&migrate);
- if (retry++ >= 3) {
- r = -ENOMEM;
- pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n",
- r, prange->svms, prange->start, prange->last);
- goto out_free;
- }
-
- goto retry;
}
if (migrate.cpages) {
@@ -425,6 +429,12 @@ retry:
out_free:
kvfree(buf);
out:
+ if (!r) {
+ pdd = svm_range_get_pdd_by_adev(prange, adev);
+ if (pdd)
+ WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
+ }
+
return r;
}
@@ -464,7 +474,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
prange->start, prange->last, best_loc);
/* FIXME: workaround for page locking bug with invalid pages */
- svm_range_prefault(prange, mm);
+ svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev));
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
@@ -493,15 +503,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
static void svm_migrate_page_free(struct page *page)
{
- /* Keep this function to avoid warning */
+ struct svm_range_bo *svm_bo = page->zone_device_data;
+
+ if (svm_bo) {
+ pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref));
+ svm_range_bo_unref(svm_bo);
+ }
}
static int
svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch)
+ dma_addr_t *scratch, uint64_t npages)
{
- uint64_t npages = migrate->cpages;
struct device *dev = adev->dev;
uint64_t *src;
dma_addr_t *dst;
@@ -518,15 +532,23 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
src = (uint64_t *)(scratch + npages);
dst = scratch;
- for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+ for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
struct page *spage;
spage = migrate_pfn_to_page(migrate->src[i]);
- if (!spage) {
- pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+ if (!spage || !is_zone_device_page(spage)) {
+ pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
prange->svms, prange->start, prange->last);
- r = -ENOMEM;
- goto out_oom;
+ if (j) {
+ r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+ src + i - j, j,
+ FROM_VRAM_TO_RAM,
+ mfence);
+ if (r)
+ goto out_oom;
+ j = 0;
+ }
+ continue;
}
src[i] = svm_migrate_addr(adev, spage);
if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
@@ -559,6 +581,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+ j++;
}
r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
@@ -581,6 +604,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end)
{
uint64_t npages = (end - start) >> PAGE_SHIFT;
+ struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
dma_addr_t *scratch;
@@ -593,7 +617,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.start = start;
migrate.end = end;
migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
- migrate.pgmap_owner = adev;
+ migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t);
size *= npages;
@@ -616,7 +640,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
if (migrate.cpages) {
r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
- scratch);
+ scratch, npages);
migrate_vma_pages(&migrate);
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
@@ -630,6 +654,12 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
out_free:
kvfree(buf);
out:
+ if (!r) {
+ pdd = svm_range_get_pdd_by_adev(prange, adev);
+ if (pdd)
+ WRITE_ONCE(pdd->page_out,
+ pdd->page_out + migrate.cpages);
+ }
return r;
}
@@ -859,7 +889,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
pgmap->range.start = res->start;
pgmap->range.end = res->end;
pgmap->ops = &svm_migrate_pgmap_ops;
- pgmap->owner = adev;
+ pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6dc22fa1e555..3426743ed228 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -730,6 +730,15 @@ struct kfd_process_device {
* number of CU's a device has along with number of other competing processes
*/
struct attribute attr_cu_occupancy;
+
+ /* sysfs counters for GPU retry fault and page migration tracking */
+ struct kobject *kobj_counters;
+ struct attribute attr_faults;
+ struct attribute attr_page_in;
+ struct attribute attr_page_out;
+ uint64_t faults;
+ uint64_t page_in;
+ uint64_t page_out;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 09b98a83f670..21ec8a18cad2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -416,6 +416,29 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
return 0;
}
+static ssize_t kfd_sysfs_counters_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct kfd_process_device *pdd;
+
+ if (!strcmp(attr->name, "faults")) {
+ pdd = container_of(attr, struct kfd_process_device,
+ attr_faults);
+ return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults));
+ }
+ if (!strcmp(attr->name, "page_in")) {
+ pdd = container_of(attr, struct kfd_process_device,
+ attr_page_in);
+ return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in));
+ }
+ if (!strcmp(attr->name, "page_out")) {
+ pdd = container_of(attr, struct kfd_process_device,
+ attr_page_out);
+ return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out));
+ }
+ return 0;
+}
+
static struct attribute attr_queue_size = {
.name = "size",
.mode = KFD_SYSFS_FILE_MODE
@@ -451,13 +474,18 @@ static const struct sysfs_ops procfs_stats_ops = {
.show = kfd_procfs_stats_show,
};
-static struct attribute *procfs_stats_attrs[] = {
- NULL
-};
-
static struct kobj_type procfs_stats_type = {
.sysfs_ops = &procfs_stats_ops,
- .default_attrs = procfs_stats_attrs,
+ .release = kfd_procfs_kobj_release,
+};
+
+static const struct sysfs_ops sysfs_counters_ops = {
+ .show = kfd_sysfs_counters_show,
+};
+
+static struct kobj_type sysfs_counters_type = {
+ .sysfs_ops = &sysfs_counters_ops,
+ .release = kfd_procfs_kobj_release,
};
int kfd_procfs_add_queue(struct queue *q)
@@ -484,34 +512,31 @@ int kfd_procfs_add_queue(struct queue *q)
return 0;
}
-static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
+static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr,
char *name)
{
- int ret = 0;
+ int ret;
- if (!p || !attr || !name)
- return -EINVAL;
+ if (!kobj || !attr || !name)
+ return;
attr->name = name;
attr->mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(attr);
- ret = sysfs_create_file(p->kobj, attr);
-
- return ret;
+ ret = sysfs_create_file(kobj, attr);
+ if (ret)
+ pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret);
}
-static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
+static void kfd_procfs_add_sysfs_stats(struct kfd_process *p)
{
- int ret = 0;
+ int ret;
int i;
char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
- if (!p)
- return -EINVAL;
-
- if (!p->kobj)
- return -EFAULT;
+ if (!p || !p->kobj)
+ return;
/*
* Create sysfs files for each GPU:
@@ -521,63 +546,87 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
*/
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
- struct kobject *kobj_stats;
snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
"stats_%u", pdd->dev->id);
- kobj_stats = kfd_alloc_struct(kobj_stats);
- if (!kobj_stats)
- return -ENOMEM;
+ pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats);
+ if (!pdd->kobj_stats)
+ return;
- ret = kobject_init_and_add(kobj_stats,
- &procfs_stats_type,
- p->kobj,
- stats_dir_filename);
+ ret = kobject_init_and_add(pdd->kobj_stats,
+ &procfs_stats_type,
+ p->kobj,
+ stats_dir_filename);
if (ret) {
pr_warn("Creating KFD proc/stats_%s folder failed",
- stats_dir_filename);
- kobject_put(kobj_stats);
- goto err;
+ stats_dir_filename);
+ kobject_put(pdd->kobj_stats);
+ pdd->kobj_stats = NULL;
+ return;
}
- pdd->kobj_stats = kobj_stats;
- pdd->attr_evict.name = "evicted_ms";
- pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
- sysfs_attr_init(&pdd->attr_evict);
- ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
- if (ret)
- pr_warn("Creating eviction stats for gpuid %d failed",
- (int)pdd->dev->id);
-
+ kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict,
+ "evicted_ms");
/* Add sysfs file to report compute unit occupancy */
- if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
- pdd->attr_cu_occupancy.name = "cu_occupancy";
- pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
- sysfs_attr_init(&pdd->attr_cu_occupancy);
- ret = sysfs_create_file(kobj_stats,
- &pdd->attr_cu_occupancy);
- if (ret)
- pr_warn("Creating %s failed for gpuid: %d",
- pdd->attr_cu_occupancy.name,
- (int)pdd->dev->id);
- }
+ if (pdd->dev->kfd2kgd->get_cu_occupancy)
+ kfd_sysfs_create_file(pdd->kobj_stats,
+ &pdd->attr_cu_occupancy,
+ "cu_occupancy");
}
-err:
- return ret;
}
-
-static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
+static void kfd_procfs_add_sysfs_counters(struct kfd_process *p)
{
int ret = 0;
int i;
+ char counters_dir_filename[MAX_SYSFS_FILENAME_LEN];
- if (!p)
- return -EINVAL;
+ if (!p || !p->kobj)
+ return;
- if (!p->kobj)
- return -EFAULT;
+ /*
+ * Create sysfs files for each GPU which supports SVM
+ * - proc/<pid>/counters_<gpuid>/
+ * - proc/<pid>/counters_<gpuid>/faults
+ * - proc/<pid>/counters_<gpuid>/page_in
+ * - proc/<pid>/counters_<gpuid>/page_out
+ */
+ for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ struct kobject *kobj_counters;
+
+ snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN,
+ "counters_%u", pdd->dev->id);
+ kobj_counters = kfd_alloc_struct(kobj_counters);
+ if (!kobj_counters)
+ return;
+
+ ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type,
+ p->kobj, counters_dir_filename);
+ if (ret) {
+ pr_warn("Creating KFD proc/%s folder failed",
+ counters_dir_filename);
+ kobject_put(kobj_counters);
+ return;
+ }
+
+ pdd->kobj_counters = kobj_counters;
+ kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults,
+ "faults");
+ kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in,
+ "page_in");
+ kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out,
+ "page_out");
+ }
+}
+
+static void kfd_procfs_add_sysfs_files(struct kfd_process *p)
+{
+ int i;
+
+ if (!p || !p->kobj)
+ return;
/*
* Create sysfs files for each GPU:
@@ -589,20 +638,14 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
pdd->dev->id);
- ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
- if (ret)
- pr_warn("Creating vram usage for gpu id %d failed",
- (int)pdd->dev->id);
+ kfd_sysfs_create_file(p->kobj, &pdd->attr_vram,
+ pdd->vram_filename);
snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
pdd->dev->id);
- ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
- if (ret)
- pr_warn("Creating sdma usage for gpu id %d failed",
- (int)pdd->dev->id);
+ kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma,
+ pdd->sdma_filename);
}
-
- return ret;
}
void kfd_procfs_del_queue(struct queue *q)
@@ -800,28 +843,17 @@ struct kfd_process *kfd_create_process(struct file *filep)
goto out;
}
- process->attr_pasid.name = "pasid";
- process->attr_pasid.mode = KFD_SYSFS_FILE_MODE;
- sysfs_attr_init(&process->attr_pasid);
- ret = sysfs_create_file(process->kobj, &process->attr_pasid);
- if (ret)
- pr_warn("Creating pasid for pid %d failed",
- (int)process->lead_thread->pid);
+ kfd_sysfs_create_file(process->kobj, &process->attr_pasid,
+ "pasid");
process->kobj_queues = kobject_create_and_add("queues",
process->kobj);
if (!process->kobj_queues)
pr_warn("Creating KFD proc/queues folder failed");
- ret = kfd_procfs_add_sysfs_stats(process);
- if (ret)
- pr_warn("Creating sysfs stats dir for pid %d failed",
- (int)process->lead_thread->pid);
-
- ret = kfd_procfs_add_sysfs_files(process);
- if (ret)
- pr_warn("Creating sysfs usage file for pid %d failed",
- (int)process->lead_thread->pid);
+ kfd_procfs_add_sysfs_stats(process);
+ kfd_procfs_add_sysfs_files(process);
+ kfd_procfs_add_sysfs_counters(process);
}
out:
if (!IS_ERR(process))
@@ -964,6 +996,50 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
p->n_pdds = 0;
}
+static void kfd_process_remove_sysfs(struct kfd_process *p)
+{
+ struct kfd_process_device *pdd;
+ int i;
+
+ if (!p->kobj)
+ return;
+
+ sysfs_remove_file(p->kobj, &p->attr_pasid);
+ kobject_del(p->kobj_queues);
+ kobject_put(p->kobj_queues);
+ p->kobj_queues = NULL;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ pdd = p->pdds[i];
+
+ sysfs_remove_file(p->kobj, &pdd->attr_vram);
+ sysfs_remove_file(p->kobj, &pdd->attr_sdma);
+
+ sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
+ if (pdd->dev->kfd2kgd->get_cu_occupancy)
+ sysfs_remove_file(pdd->kobj_stats,
+ &pdd->attr_cu_occupancy);
+ kobject_del(pdd->kobj_stats);
+ kobject_put(pdd->kobj_stats);
+ pdd->kobj_stats = NULL;
+ }
+
+ for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
+ pdd = p->pdds[i];
+
+ sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
+ sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
+ sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out);
+ kobject_del(pdd->kobj_counters);
+ kobject_put(pdd->kobj_counters);
+ pdd->kobj_counters = NULL;
+ }
+
+ kobject_del(p->kobj);
+ kobject_put(p->kobj);
+ p->kobj = NULL;
+}
+
/* No process locking is needed in this function, because the process
* is not findable any more. We must assume that no other thread is
* using it any more, otherwise we couldn't safely free the process
@@ -973,33 +1049,7 @@ static void kfd_process_wq_release(struct work_struct *work)
{
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
- int i;
-
- /* Remove the procfs files */
- if (p->kobj) {
- sysfs_remove_file(p->kobj, &p->attr_pasid);
- kobject_del(p->kobj_queues);
- kobject_put(p->kobj_queues);
- p->kobj_queues = NULL;
-
- for (i = 0; i < p->n_pdds; i++) {
- struct kfd_process_device *pdd = p->pdds[i];
-
- sysfs_remove_file(p->kobj, &pdd->attr_vram);
- sysfs_remove_file(p->kobj, &pdd->attr_sdma);
- sysfs_remove_file(p->kobj, &pdd->attr_evict);
- if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
- sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
- kobject_del(pdd->kobj_stats);
- kobject_put(pdd->kobj_stats);
- pdd->kobj_stats = NULL;
- }
-
- kobject_del(p->kobj);
- kobject_put(p->kobj);
- p->kobj = NULL;
- }
-
+ kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
kfd_process_free_outstanding_kfd_bos(p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 95a6c36cea4c..243dd1efcdbf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -153,6 +153,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
if (pqn->q && pqn->q->gws)
amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
pqn->q->gws);
+ kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
list_del(&pqn->process_queue_list);
kfree(pqn);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index dff1011dd7ee..9a71d8919bd6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -119,28 +119,40 @@ static void svm_range_remove_notifier(struct svm_range *prange)
}
static int
-svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
- unsigned long *hmm_pfns, uint64_t npages)
+svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
+ unsigned long *hmm_pfns, uint32_t gpuidx)
{
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
- dma_addr_t *addr = *dma_addr;
+ dma_addr_t *addr = prange->dma_addr[gpuidx];
+ struct device *dev = adev->dev;
struct page *page;
int i, r;
if (!addr) {
- addr = kvmalloc_array(npages, sizeof(*addr),
+ addr = kvmalloc_array(prange->npages, sizeof(*addr),
GFP_KERNEL | __GFP_ZERO);
if (!addr)
return -ENOMEM;
- *dma_addr = addr;
+ prange->dma_addr[gpuidx] = addr;
}
- for (i = 0; i < npages; i++) {
+ for (i = 0; i < prange->npages; i++) {
if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
"leaking dma mapping\n"))
dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
page = hmm_pfn_to_page(hmm_pfns[i]);
+ if (is_zone_device_page(page)) {
+ struct amdgpu_device *bo_adev =
+ amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+
+ addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
+ bo_adev->vm_manager.vram_base_offset -
+ bo_adev->kfd.dev->pgmap.range.start;
+ addr[i] |= SVM_RANGE_VRAM_DOMAIN;
+ pr_debug("vram address detected: 0x%llx\n", addr[i]);
+ continue;
+ }
addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
r = dma_mapping_error(dev, addr[i]);
if (r) {
@@ -175,8 +187,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
}
adev = (struct amdgpu_device *)pdd->dev->kgd;
- r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx],
- hmm_pfns, prange->npages);
+ r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
if (r)
break;
}
@@ -301,14 +312,6 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
return true;
}
-static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
-{
- if (svm_bo)
- kref_get(&svm_bo->kref);
-
- return svm_bo;
-}
-
static void svm_range_bo_release(struct kref *kref)
{
struct svm_range_bo *svm_bo;
@@ -347,7 +350,7 @@ static void svm_range_bo_release(struct kref *kref)
kfree(svm_bo);
}
-static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+void svm_range_bo_unref(struct svm_range_bo *svm_bo)
{
if (!svm_bo)
return;
@@ -564,6 +567,24 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
return (struct amdgpu_device *)pdd->dev->kgd;
}
+struct kfd_process_device *
+svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)
+{
+ struct kfd_process *p;
+ int32_t gpu_idx, gpuid;
+ int r;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx);
+ if (r) {
+ pr_debug("failed to get device id by adev %p\n", adev);
+ return NULL;
+ }
+
+ return kfd_process_device_from_gpuidx(p, gpu_idx);
+}
+
static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
{
struct ttm_operation_ctx ctx = { false, false };
@@ -1002,21 +1023,22 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
}
static uint64_t
-svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
+svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
+ int domain)
{
struct amdgpu_device *bo_adev;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
uint64_t pte_flags;
- bool snoop = !prange->ttm_res;
+ bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
- if (prange->svm_bo && prange->ttm_res)
+ if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
switch (adev->asic_type) {
case CHIP_ARCTURUS:
- if (prange->svm_bo && prange->ttm_res) {
+ if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@@ -1032,7 +1054,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
}
break;
case CHIP_ALDEBARAN:
- if (prange->svm_bo && prange->ttm_res) {
+ if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@@ -1062,14 +1084,14 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
pte_flags = AMDGPU_PTE_VALID;
- pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM;
+ pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
prange->svms, prange->start, prange->last,
- prange->ttm_res ? 1:0, pte_flags, mapping_flags);
+ (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
return pte_flags;
}
@@ -1140,31 +1162,41 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va bo_va;
bool table_freed = false;
uint64_t pte_flags;
+ unsigned long last_start;
+ int last_domain;
int r = 0;
+ int64_t i;
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
prange->last);
- if (prange->svm_bo && prange->ttm_res) {
+ if (prange->svm_bo && prange->ttm_res)
bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
- prange->mapping.bo_va = &bo_va;
- }
- prange->mapping.start = prange->start;
- prange->mapping.last = prange->last;
- prange->mapping.offset = prange->ttm_res ? prange->offset : 0;
- pte_flags = svm_range_get_pte_flags(adev, prange);
+ last_start = prange->start;
+ for (i = 0; i < prange->npages; i++) {
+ last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
+ dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
+ if ((prange->start + i) < prange->last &&
+ last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN))
+ continue;
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
- prange->mapping.start,
- prange->mapping.last, pte_flags,
- prange->mapping.offset,
- prange->ttm_res,
- dma_addr, &vm->last_update,
- &table_freed);
- if (r) {
- pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
- goto out;
+ pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
+ last_start, prange->start + i, last_domain ? "GPU" : "CPU");
+ pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
+ r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
+ last_start,
+ prange->start + i, pte_flags,
+ last_start - prange->start,
+ NULL,
+ dma_addr,
+ &vm->last_update,
+ &table_freed);
+ if (r) {
+ pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
+ goto out;
+ }
+ last_start = prange->start + i + 1;
}
r = amdgpu_vm_update_pdes(adev, vm, false);
@@ -1185,7 +1217,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
p->pasid, TLB_FLUSH_LEGACY);
}
out:
- prange->mapping.bo_va = NULL;
return r;
}
@@ -1319,6 +1350,17 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
}
+static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
+{
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *adev;
+
+ pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+ return SVM_ADEV_PGMAP_OWNER(adev);
+}
+
/*
* Validation+GPU mapping with concurrent invalidation (MMU notifiers)
*
@@ -1349,6 +1391,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
{
struct svm_validate_context ctx;
struct hmm_range *hmm_range;
+ struct kfd_process *p;
+ void *owner;
+ int32_t idx;
int r = 0;
ctx.process = container_of(prange->svms, struct kfd_process, svms);
@@ -1394,33 +1439,38 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
svm_range_reserve_bos(&ctx);
- if (!prange->actual_loc) {
- r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
- prange->start << PAGE_SHIFT,
- prange->npages, &hmm_range,
- false, true);
- if (r) {
- pr_debug("failed %d to get svm range pages\n", r);
- goto unreserve_out;
- }
-
- r = svm_range_dma_map(prange, ctx.bitmap,
- hmm_range->hmm_pfns);
- if (r) {
- pr_debug("failed %d to dma map range\n", r);
- goto unreserve_out;
+ p = container_of(prange->svms, struct kfd_process, svms);
+ owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap,
+ MAX_GPU_INSTANCE));
+ for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) {
+ if (kfd_svm_page_owner(p, idx) != owner) {
+ owner = NULL;
+ break;
}
+ }
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+ prange->start << PAGE_SHIFT,
+ prange->npages, &hmm_range,
+ false, true, owner);
+ if (r) {
+ pr_debug("failed %d to get svm range pages\n", r);
+ goto unreserve_out;
+ }
- prange->validated_once = true;
+ r = svm_range_dma_map(prange, ctx.bitmap,
+ hmm_range->hmm_pfns);
+ if (r) {
+ pr_debug("failed %d to dma map range\n", r);
+ goto unreserve_out;
}
+ prange->validated_once = true;
+
svm_range_lock(prange);
- if (!prange->actual_loc) {
- if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
- pr_debug("hmm update the range, need validate again\n");
- r = -EAGAIN;
- goto unlock_out;
- }
+ if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+ pr_debug("hmm update the range, need validate again\n");
+ r = -EAGAIN;
+ goto unlock_out;
}
if (!list_empty(&prange->child_list)) {
pr_debug("range split by unmap in parallel, validate again\n");
@@ -1572,6 +1622,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
unsigned long start, unsigned long last)
{
struct svm_range_list *svms = prange->svms;
+ struct svm_range *pchild;
struct kfd_process *p;
int r = 0;
@@ -1583,7 +1634,19 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
if (!p->xnack_enabled) {
int evicted_ranges;
- atomic_inc(&prange->invalid);
+ list_for_each_entry(pchild, &prange->child_list, child_list) {
+ mutex_lock_nested(&pchild->lock, 1);
+ if (pchild->start <= last && pchild->last >= start) {
+ pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
+ pchild->start, pchild->last);
+ atomic_inc(&pchild->invalid);
+ }
+ mutex_unlock(&pchild->lock);
+ }
+
+ if (prange->start <= last && prange->last >= start)
+ atomic_inc(&prange->invalid);
+
evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
if (evicted_ranges != 1)
return r;
@@ -1600,7 +1663,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
schedule_delayed_work(&svms->restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
- struct svm_range *pchild;
unsigned long s, l;
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
@@ -2311,6 +2373,27 @@ static bool svm_range_skip_recover(struct svm_range *prange)
return false;
}
+static void
+svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
+ struct svm_range *prange, int32_t gpuidx)
+{
+ struct kfd_process_device *pdd;
+
+ if (gpuidx == MAX_GPU_INSTANCE)
+ /* fault is on different page of same range
+ * or fault is skipped to recover later
+ */
+ pdd = svm_range_get_pdd_by_adev(prange, adev);
+ else
+ /* fault recovered
+ * or fault cannot recover because GPU no access on the range
+ */
+ pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+
+ if (pdd)
+ WRITE_ONCE(pdd->faults, pdd->faults + 1);
+}
+
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint64_t addr)
@@ -2320,7 +2403,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
struct svm_range *prange;
struct kfd_process *p;
uint64_t timestamp;
- int32_t best_loc, gpuidx;
+ int32_t best_loc;
+ int32_t gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
int r = 0;
@@ -2440,6 +2524,9 @@ out_unlock_range:
out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
+
+ svm_range_count_fault(adev, p, prange, gpuidx);
+
mmput(mm);
out:
kfd_unref_process(p);
@@ -2662,7 +2749,8 @@ out:
/* FIXME: This is a workaround for page locking bug when some pages are
* invalid during migration to VRAM
*/
-void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm)
+void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
+ void *owner)
{
struct hmm_range *hmm_range;
int r;
@@ -2673,7 +2761,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm)
r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
prange->start << PAGE_SHIFT,
prange->npages, &hmm_range,
- false, true);
+ false, true, owner);
if (!r) {
amdgpu_hmm_range_get_pages_done(hmm_range);
prange->validated_once = true;
@@ -2718,16 +2806,6 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
best_loc == prange->actual_loc)
return 0;
- /*
- * Prefetch to GPU without host access flag, set actual_loc to gpu, then
- * validate on gpu and map to gpus will be handled afterwards.
- */
- if (best_loc && !prange->actual_loc &&
- !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) {
- prange->actual_loc = best_loc;
- return 0;
- }
-
if (!best_loc) {
r = svm_migrate_vram_to_ram(prange, mm);
*migrated = !r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 0c0fc399395e..3fc1fd8b4fbc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -35,6 +35,10 @@
#include "amdgpu.h"
#include "kfd_priv.h"
+#define SVM_RANGE_VRAM_DOMAIN (1UL << 0)
+#define SVM_ADEV_PGMAP_OWNER(adev)\
+ ((adev)->hive ? (void *)(adev)->hive : (void *)(adev))
+
struct svm_range_bo {
struct amdgpu_bo *bo;
struct kref kref;
@@ -110,7 +114,6 @@ struct svm_range {
struct list_head update_list;
struct list_head remove_list;
struct list_head insert_list;
- struct amdgpu_bo_va_mapping mapping;
uint64_t npages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
@@ -147,6 +150,14 @@ static inline void svm_range_unlock(struct svm_range *prange)
mutex_unlock(&prange->lock);
}
+static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
+{
+ if (svm_bo)
+ kref_get(&svm_bo->kref);
+
+ return svm_bo;
+}
+
int svm_range_list_init(struct kfd_process *p);
void svm_range_list_fini(struct kfd_process *p);
int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
@@ -173,13 +184,17 @@ void schedule_deferred_list_work(struct svm_range_list *svms);
void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
void svm_range_free_dma_mappings(struct svm_range *prange);
-void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm);
+void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
+ void *owner);
+struct kfd_process_device *
+svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
/* SVM API and HMM page migration work together, device memory type
* is initialized to not 0 when page migration register device memory.
*/
#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0)
+void svm_range_bo_unref(struct svm_range_bo *svm_bo);
#else
struct kfd_process;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b5b5ccf0ed71..01e1062dc235 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1160,6 +1160,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_feature_mask & DC_DISABLE_FRACTIONAL_PWM_MASK)
init_data.flags.disable_fractional_pwm = true;
+ if (amdgpu_dc_feature_mask & DC_EDP_NO_POWER_SEQUENCING)
+ init_data.flags.edp_no_power_sequencing = true;
+
init_data.flags.power_down_display_on_boot = true;
INIT_LIST_HEAD(&adev->dm.da_list);
@@ -10118,7 +10121,7 @@ static int validate_overlay(struct drm_atomic_state *state)
int i;
struct drm_plane *plane;
struct drm_plane_state *new_plane_state;
- struct drm_plane_state *primary_state, *cursor_state, *overlay_state = NULL;
+ struct drm_plane_state *primary_state, *overlay_state = NULL;
/* Check if primary plane is contained inside overlay */
for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) {
@@ -10148,14 +10151,6 @@ static int validate_overlay(struct drm_atomic_state *state)
if (!primary_state->crtc)
return 0;
- /* check if cursor plane is enabled */
- cursor_state = drm_atomic_get_plane_state(state, overlay_state->crtc->cursor);
- if (IS_ERR(cursor_state))
- return PTR_ERR(cursor_state);
-
- if (drm_atomic_plane_disabling(plane->state, cursor_state))
- return 0;
-
/* Perform the bounds check to ensure the overlay plane covers the primary */
if (primary_state->crtc_x < overlay_state->crtc_x ||
primary_state->crtc_y < overlay_state->crtc_y ||
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 5101a4f8f69f..45640f1c26c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -297,6 +297,7 @@ struct dc_config {
bool allow_seamless_boot_optimization;
bool power_down_display_on_boot;
bool edp_not_connected;
+ bool edp_no_power_sequencing;
bool force_enum_edp;
bool forced_clocks;
bool allow_lttpr_non_transparent_mode;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 2938caaa2299..62d595ded866 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1022,8 +1022,20 @@ void dce110_edp_backlight_control(
/* dc_service_sleep_in_milliseconds(50); */
/*edp 1.2*/
panel_instance = link->panel_cntl->inst;
- if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON)
- edp_receiver_ready_T7(link);
+
+ if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) {
+ if (!link->dc->config.edp_no_power_sequencing)
+ /*
+ * Sometimes, DP receiver chip power-controlled externally by an
+ * Embedded Controller could be treated and used as eDP,
+ * if it drives mobile display. In this case,
+ * we shouldn't be doing power-sequencing, hence we can skip
+ * waiting for T7-ready.
+ */
+ edp_receiver_ready_T7(link);
+ else
+ DC_LOG_DC("edp_receiver_ready_T7 skipped\n");
+ }
if (ctx->dc->ctx->dmub_srv &&
ctx->dc->debug.dmub_command_table) {
@@ -1048,8 +1060,19 @@ void dce110_edp_backlight_control(
dc_link_backlight_enable_aux(link, enable);
/*edp 1.2*/
- if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF)
- edp_add_delay_for_T9(link);
+ if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) {
+ if (!link->dc->config.edp_no_power_sequencing)
+ /*
+ * Sometimes, DP receiver chip power-controlled externally by an
+ * Embedded Controller could be treated and used as eDP,
+ * if it drives mobile display. In this case,
+ * we shouldn't be doing power-sequencing, hence we can skip
+ * waiting for T9-ready.
+ */
+ edp_add_delay_for_T9(link);
+ else
+ DC_LOG_DC("edp_receiver_ready_T9 skipped\n");
+ }
if (!enable && link->dpcd_sink_ext_caps.bits.oled)
msleep(OLED_PRE_T11_DELAY);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
index 5dcdc5a858fe..4bab97acb155 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile
@@ -28,6 +28,7 @@ endif
CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float
endif
+ifdef CONFIG_X86
ifdef IS_OLD_GCC
# Stack alignment mismatch, proceed with caution.
# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
@@ -36,6 +37,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4
else
CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2
endif
+endif
AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index d34024fd798a..45862167e6ce 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -50,6 +50,10 @@ dml_ccflags += -msse2
endif
endif
+ifneq ($(CONFIG_FRAME_WARN),0)
+frame_warn_flag := -Wframe-larger-than=2048
+endif
+
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
ifdef CONFIG_DRM_AMD_DC_DCN
@@ -60,9 +64,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) -Wframe-larger-than=2048
+CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 5f245bde54ff..a2a4fbeb83f8 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -119,7 +119,7 @@ bool dal_irq_service_set(
dal_irq_service_ack(irq_service, source);
- if (info->funcs->set)
+ if (info->funcs && info->funcs->set)
return info->funcs->set(irq_service, info, enable);
dal_irq_service_set_generic(irq_service, info, enable);
@@ -153,7 +153,7 @@ bool dal_irq_service_ack(
return false;
}
- if (info->funcs->ack)
+ if (info->funcs && info->funcs->ack)
return info->funcs->ack(irq_service, info);
dal_irq_service_ack_generic(irq_service, info);
diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h
index 5f9346622301..1139b9eb9f6f 100644
--- a/drivers/gpu/drm/amd/display/dc/irq_types.h
+++ b/drivers/gpu/drm/amd/display/dc/irq_types.h
@@ -165,7 +165,7 @@ enum irq_type
};
#define DAL_VALID_IRQ_SRC_NUM(src) \
- ((src) <= DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID)
+ ((src) < DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID)
/* Number of Page Flip IRQ Sources. */
#define DAL_PFLIP_IRQ_SRC_NUM \
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
index 8c886ece71f6..973de346410d 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c
@@ -352,3 +352,63 @@ uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub)
{
return REG_READ(DMCUB_TIMER_CURRENT);
}
+
+void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data)
+{
+ uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset;
+ uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled;
+
+ if (!dmub || !diag_data)
+ return;
+
+ memset(diag_data, 0, sizeof(*diag_data));
+
+ diag_data->dmcub_version = dmub->fw_version;
+
+ diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0);
+ diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1);
+ diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2);
+ diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3);
+ diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4);
+ diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5);
+ diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6);
+ diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7);
+ diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8);
+ diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9);
+ diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10);
+ diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11);
+ diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12);
+ diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13);
+ diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14);
+ diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15);
+
+ diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR);
+ diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR);
+ diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR);
+
+ diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR);
+ diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR);
+ diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE);
+
+ diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR);
+ diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR);
+ diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE);
+
+ REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled);
+ diag_data->is_dmcub_enabled = is_dmub_enabled;
+
+ REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset);
+ diag_data->is_dmcub_soft_reset = is_soft_reset;
+
+ REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset);
+ diag_data->is_dmcub_secure_reset = is_sec_reset;
+
+ REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled);
+ diag_data->is_traceport_en = is_traceport_enabled;
+
+ REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled);
+ diag_data->is_cw0_enabled = is_cw0_enabled;
+
+ REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled);
+ diag_data->is_cw6_enabled = is_cw6_enabled;
+}
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h
index 2829c3e9a310..9456a6a2d518 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h
@@ -36,6 +36,9 @@ struct dmub_srv;
DMUB_SR(DMCUB_CNTL) \
DMUB_SR(DMCUB_CNTL2) \
DMUB_SR(DMCUB_SEC_CNTL) \
+ DMUB_SR(DMCUB_INBOX0_SIZE) \
+ DMUB_SR(DMCUB_INBOX0_RPTR) \
+ DMUB_SR(DMCUB_INBOX0_WPTR) \
DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \
DMUB_SR(DMCUB_INBOX1_SIZE) \
DMUB_SR(DMCUB_INBOX1_RPTR) \
@@ -103,11 +106,15 @@ struct dmub_srv;
DMUB_SR(DMCUB_SCRATCH14) \
DMUB_SR(DMCUB_SCRATCH15) \
DMUB_SR(DMCUB_GPINT_DATAIN1) \
+ DMUB_SR(DMCUB_GPINT_DATAOUT) \
DMUB_SR(CC_DC_PIPE_DIS) \
DMUB_SR(MMHUBBUB_SOFT_RESET) \
DMUB_SR(DCN_VM_FB_LOCATION_BASE) \
DMUB_SR(DCN_VM_FB_OFFSET) \
- DMUB_SR(DMCUB_TIMER_CURRENT)
+ DMUB_SR(DMCUB_TIMER_CURRENT) \
+ DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \
+ DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \
+ DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR)
#define DMUB_DCN31_FIELDS() \
DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \
@@ -115,6 +122,7 @@ struct dmub_srv;
DMUB_SF(DMCUB_CNTL2, DMCUB_SOFT_RESET) \
DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \
DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \
+ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \
DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \
DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \
DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \
@@ -138,11 +146,13 @@ struct dmub_srv;
DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \
DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \
DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \
- DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET)
+ DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \
+ DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR)
struct dmub_srv_dcn31_reg_offset {
#define DMUB_SR(reg) uint32_t reg;
DMUB_DCN31_REGS()
+ DMCUB_INTERNAL_REGS()
#undef DMUB_SR
};
@@ -227,4 +237,6 @@ void dmub_dcn31_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset);
uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub);
+void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data);
+
#endif /* _DMUB_DCN31_H_ */
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index fd7e996ab1d7..2bdbd7406f56 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -208,6 +208,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
break;
case DMUB_ASIC_DCN31:
+ dmub->regs_dcn31 = &dmub_srv_dcn31_regs;
funcs->reset = dmub_dcn31_reset;
funcs->reset_release = dmub_dcn31_reset_release;
funcs->backdoor_load = dmub_dcn31_backdoor_load;
@@ -231,9 +232,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic)
funcs->get_outbox0_wptr = dmub_dcn31_get_outbox0_wptr;
funcs->set_outbox0_rptr = dmub_dcn31_set_outbox0_rptr;
- if (asic == DMUB_ASIC_DCN31) {
- dmub->regs_dcn31 = &dmub_srv_dcn31_regs;
- }
+ funcs->get_diagnostic_data = dmub_dcn31_get_diagnostic_data;
funcs->get_current_time = dmub_dcn31_get_current_time;
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 332b0df53e52..ff1d3d4a6488 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -223,10 +223,12 @@ enum amd_harvest_ip_mask {
};
enum DC_FEATURE_MASK {
- DC_FBC_MASK = 0x1,
- DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2,
- DC_DISABLE_FRACTIONAL_PWM_MASK = 0x4,
- DC_PSR_MASK = 0x8,
+ //Default value can be found at "uint amdgpu_dc_feature_mask"
+ DC_FBC_MASK = (1 << 0), //0x1, disabled by default
+ DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default
+ DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default
+ DC_PSR_MASK = (1 << 3), //0x8, disabled by default
+ DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default
};
enum DC_DEBUG_MASK {
diff --git a/drivers/gpu/drm/amd/include/navi10_enum.h b/drivers/gpu/drm/amd/include/navi10_enum.h
index d5ead9680c6e..84bcb96f76ea 100644
--- a/drivers/gpu/drm/amd/include/navi10_enum.h
+++ b/drivers/gpu/drm/amd/include/navi10_enum.h
@@ -430,7 +430,7 @@ ARRAY_2D_DEPTH = 0x00000001,
*/
typedef enum ENUM_NUM_SIMD_PER_CU {
-NUM_SIMD_PER_CU = 0x00000004,
+NUM_SIMD_PER_CU = 0x00000002,
} ENUM_NUM_SIMD_PER_CU;
/*
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index a276ebad47e6..769f58d5ae1a 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2902,14 +2902,15 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
return sprintf(buf, "%i\n", 0);
}
-static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+
+static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev,
+ struct device_attribute *attr,
+ char *buf,
+ enum pp_power_limit_level pp_limit_level)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum pp_power_type power_type = to_sensor_dev_attr(attr)->index;
- enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_MAX;
uint32_t limit;
ssize_t size;
int r;
@@ -2919,17 +2920,17 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
if (adev->in_suspend && !adev->in_runpm)
return -EPERM;
+ if ( !(pp_funcs && pp_funcs->get_power_limit))
+ return -ENODATA;
+
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
- if (pp_funcs && pp_funcs->get_power_limit)
- r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
- pp_limit_level, power_type);
- else
- r = -ENODATA;
+ r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
+ pp_limit_level, power_type);
if (!r)
size = sysfs_emit(buf, "%u\n", limit * 1000000);
@@ -2942,85 +2943,31 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
return size;
}
-static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
+
+static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct amdgpu_device *adev = dev_get_drvdata(dev);
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- enum pp_power_type power_type = to_sensor_dev_attr(attr)->index;
- enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_CURRENT;
- uint32_t limit;
- ssize_t size;
- int r;
-
- if (amdgpu_in_reset(adev))
- return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
- return -EPERM;
-
- r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- if (r < 0) {
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
-
- if (pp_funcs && pp_funcs->get_power_limit)
- r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
- pp_limit_level, power_type);
- else
- r = -ENODATA;
+ return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MAX);
- if (!r)
- size = sysfs_emit(buf, "%u\n", limit * 1000000);
- else
- size = sysfs_emit(buf, "\n");
+}
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_CURRENT);
- return size;
}
static ssize_t amdgpu_hwmon_show_power_cap_default(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct amdgpu_device *adev = dev_get_drvdata(dev);
- const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
- enum pp_power_type power_type = to_sensor_dev_attr(attr)->index;
- enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_DEFAULT;
- uint32_t limit;
- ssize_t size;
- int r;
+ return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_DEFAULT);
- if (amdgpu_in_reset(adev))
- return -EPERM;
- if (adev->in_suspend && !adev->in_runpm)
- return -EPERM;
-
- r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- if (r < 0) {
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
-
- if (pp_funcs && pp_funcs->get_power_limit)
- r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit,
- pp_limit_level, power_type);
- else
- r = -ENODATA;
-
- if (!r)
- size = sysfs_emit(buf, "%u\n", limit * 1000000);
- else
- size = sysfs_emit(buf, "\n");
-
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
- return size;
}
+
static ssize_t amdgpu_hwmon_show_power_label(struct device *dev,
struct device_attribute *attr,
char *buf)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index cb375f1beebd..ebe672142808 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1453,10 +1453,14 @@ static int smu_hw_fini(void *handle)
if (smu->is_apu) {
smu_powergate_sdma(&adev->smu, true);
- smu_dpm_set_vcn_enable(smu, false);
- smu_dpm_set_jpeg_enable(smu, false);
}
+ smu_dpm_set_vcn_enable(smu, false);
+ smu_dpm_set_jpeg_enable(smu, false);
+
+ adev->vcn.cur_state = AMD_PG_STATE_GATE;
+ adev->jpeg.cur_state = AMD_PG_STATE_GATE;
+
if (!smu->pm_enabled)
return 0;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
index 7664334d8144..18a1ffdca227 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
@@ -189,10 +189,11 @@ err0_out:
static int yellow_carp_system_features_control(struct smu_context *smu, bool en)
{
struct smu_feature *feature = &smu->smu_feature;
+ struct amdgpu_device *adev = smu->adev;
uint32_t feature_mask[2];
int ret = 0;
- if (!en)
+ if (!en && !adev->in_s0ix)
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
bitmap_zero(feature->enabled, feature->feature_num);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 406681317419..573154268d43 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1325,6 +1325,7 @@ radeon_user_framebuffer_create(struct drm_device *dev,
/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
if (obj->import_attach) {
DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n");
+ drm_gem_object_put(obj);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8cd135fa6dcd..5c23b77cb81a 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -374,13 +374,13 @@ radeon_pci_shutdown(struct pci_dev *pdev)
if (radeon_device_is_virtual())
radeon_pci_remove(pdev);
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64)
/*
* Some adapters need to be suspended before a
* shutdown occurs in order to prevent an error
- * during kexec.
- * Make this power specific becauase it breaks
- * some non-power boards.
+ * during kexec, shutdown or reboot.
+ * Make this power and Loongson specific because
+ * it breaks some other boards.
*/
radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false);
#endif