diff options
158 files changed, 5162 insertions, 3019 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3d2625beacf7..327a0daf4a1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -193,6 +193,7 @@ static const bool debug_evictions; /* = false */ #endif extern int amdgpu_tmz; +extern int amdgpu_reset_method; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -1010,10 +1011,10 @@ int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, uint32_t *buf, size_t size, bool write); -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags); -void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, +uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); +void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1032,8 +1033,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define AMDGPU_REGS_NO_KIQ (1<<1) -#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) -#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) +#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) #define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) #define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) @@ -1041,9 +1042,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) -#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0)) -#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) +#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) +#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) @@ -1080,7 +1081,16 @@ int emu_soc_asic_init(struct amdgpu_device *adev); tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) -#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false)) + +#define WREG32_SMC_P(_Reg, _Val, _Mask) \ + do { \ + u32 tmp = RREG32_SMC(_Reg); \ + tmp &= (_Mask); \ + tmp |= ((_Val) & ~(_Mask)); \ + WREG32_SMC(_Reg, tmp); \ + } while (0) + +#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_mm_rreg((adev), (reg), false)) #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg)) #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index ad59ac4423b8..1b865fed74ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -31,8 +31,6 @@ #include "amdgpu_xgmi.h" #include <uapi/linux/kfd_ioctl.h> -static const unsigned int compute_vmid_bitmap = 0xFF00; - /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables */ @@ -113,7 +111,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = compute_vmid_bitmap, + .compute_vmid_bitmap = + ((1 << AMDGPU_NUM_VMID) - 1) - + ((1 << adev->vm_manager.first_kfd_vmid) - 1), .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, .gpuvm_size = min(adev->vm_manager.max_pfn @@ -637,10 +637,8 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { - if (adev->kfd.dev) { - if ((1 << vmid) & compute_vmid_bitmap) - return true; - } + if (adev->kfd.dev) + return vmid >= adev->vm_manager.first_kfd_vmid; return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index e249b22fef54..1279053324f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -111,6 +111,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) union igp_info { struct atom_integrated_system_info_v1_11 v11; + struct atom_integrated_system_info_v1_12 v12; }; union umc_info { @@ -214,6 +215,15 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (vram_type) *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); break; + case 12: + mem_channel_number = igp_info->v12.umachannelnumber; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v12.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index aeada7c9fbea..a3fa1560de96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1343,27 +1343,37 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) { struct amdgpu_job *job; - struct drm_sched_job *s_job; + struct drm_sched_job *s_job, *tmp; uint32_t preempt_seq; struct dma_fence *fence, **ptr; struct amdgpu_fence_driver *drv = &ring->fence_drv; struct drm_gpu_scheduler *sched = &ring->sched; + bool preempted = true; if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) return; preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2)); - if (preempt_seq <= atomic_read(&drv->last_seq)) - return; + if (preempt_seq <= atomic_read(&drv->last_seq)) { + preempted = false; + goto no_preempt; + } preempt_seq &= drv->num_fences_mask; ptr = &drv->fences[preempt_seq]; fence = rcu_dereference_protected(*ptr, 1); +no_preempt: spin_lock(&sched->job_list_lock); - list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { + if (dma_fence_is_signaled(&s_job->s_fence->finished)) { + /* remove job from ring_mirror_list */ + list_del_init(&s_job->node); + sched->ops->free_job(s_job); + continue; + } job = to_amdgpu_job(s_job); - if (job->fence == fence) + if (preempted && job->fence == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } @@ -1461,10 +1471,12 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) } if (is_support_sw_smu(adev)) { - ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq, true); + ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq); if (ret || val > max_freq || val < min_freq) return -EINVAL; - ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val, true); + ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val); + } else { + return 0; } pm_runtime_mark_last_busy(adev->ddev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a649e40fd96f..aa5b54e5a1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -81,6 +81,7 @@ MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -114,6 +115,7 @@ const char *amdgpu_asic_name[] = { "NAVI14", "NAVI12", "SIENNA_CICHLID", + "NAVY_FLOUNDER", "LAST", }; @@ -301,10 +303,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, } /* - * device register access helper functions. + * MMIO register access helper functions. */ /** - * amdgpu_device_rreg - read a register + * amdgpu_mm_rreg - read a memory mapped IO register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -312,8 +314,8 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, * * Returns the 32 bit value from the offset specified. */ -uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags) +uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, + uint32_t acc_flags) { uint32_t ret; @@ -322,9 +324,15 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, if ((reg * 4) < adev->rmmio_size) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); - else - ret = adev->pcie_rreg(adev, (reg * 4)); - trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); + else { + unsigned long flags; + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } + trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); return ret; } @@ -370,19 +378,24 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) BUG(); } -void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_t reg, - uint32_t v, uint32_t acc_flags) +void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { - trace_amdgpu_device_wreg(adev->pdev->device, reg, v); + trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); if ((reg * 4) < adev->rmmio_size) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - else - adev->pcie_wreg(adev, (reg * 4), v); + else { + unsigned long flags; + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); + writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + } } /** - * amdgpu_device_wreg - write to a register + * amdgpu_mm_wreg - write to a memory mapped IO register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -391,13 +404,13 @@ void static inline amdgpu_device_wreg_no_kiq(struct amdgpu_device *adev, uint32_ * * Writes the value specified to the offset specified. */ -void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags) +void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, + uint32_t acc_flags) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_kiq_wreg(adev, reg, v); - amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); + amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); } /* @@ -416,7 +429,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); } - amdgpu_device_wreg_no_kiq(adev, reg, v, acc_flags); + amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); } /** @@ -1621,6 +1634,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1722,6 +1738,12 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) amdgpu_device_enable_virtual_display(adev); + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_request_full_gpu(adev, true); + if (r) + return r; + } + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_SI case CHIP_VERDE: @@ -1788,6 +1810,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->family = AMDGPU_FAMILY_NV; r = nv_set_ip_blocks(adev); @@ -1801,31 +1824,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) amdgpu_amdkfd_device_probe(adev); - if (amdgpu_sriov_vf(adev)) { - /* handle vbios stuff prior full access mode for new handshake */ - if (adev->virt.req_init_data_ver == 1) { - if (!amdgpu_get_bios(adev)) { - DRM_ERROR("failed to get vbios\n"); - return -EINVAL; - } - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; - } - } - } - - /* we need to send REQ_GPU here for legacy handshaker otherwise the vbios - * will not be prepared by host for this VF */ - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver < 1) { - r = amdgpu_virt_request_full_gpu(adev, true); - if (r) - return r; - } - adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; @@ -1857,10 +1855,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; - /* skip vbios handling for new handshake */ - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver == 1) - continue; - /* Read BIOS */ if (!amdgpu_get_bios(adev)) return -EINVAL; @@ -1987,12 +1981,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev) && adev->virt.req_init_data_ver > 0) { - r = amdgpu_virt_request_full_gpu(adev, true); - if (r) - return -EAGAIN; - } - for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -2474,18 +2462,21 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; + /* displays are handled separately */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { - /* XXX handle errors */ - r = adev->ip_blocks[i].version->funcs->suspend(adev); - /* XXX handle errors */ - if (r) { - DRM_ERROR("suspend of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - adev->ip_blocks[i].status.hw = false; + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) + continue; + + /* XXX handle errors */ + r = adev->ip_blocks[i].version->funcs->suspend(adev); + /* XXX handle errors */ + if (r) { + DRM_ERROR("suspend of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; } + + adev->ip_blocks[i].status.hw = false; } return 0; @@ -2817,6 +2808,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: #endif return amdgpu_dc != 0; #endif @@ -3324,6 +3316,9 @@ fence_driver_init: queue_delayed_work(system_wq, &adev->delayed_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); + if (amdgpu_sriov_vf(adev)) + flush_delayed_work(&adev->delayed_init_work); + r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); if (r) { dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -3951,6 +3946,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: break; default: goto disabled; @@ -4256,18 +4252,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_hive_info *hive = NULL; struct amdgpu_device *tmp_adev = NULL; int i, r = 0; - bool in_ras_intr = amdgpu_ras_intr_triggered(); - bool use_baco = - (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? - true : false; + bool need_emergency_restart = false; bool audio_suspended = false; + /** + * Special case: RAS triggered and full reset isn't supported + */ + need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); + /* * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { - + if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); ksys_sync_helper(); @@ -4275,7 +4272,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } dev_info(adev->dev, "GPU %s begin!\n", - (in_ras_intr && !use_baco) ? "jobs stop":"reset"); + need_emergency_restart ? "jobs stop":"reset"); /* * Here we trylock to avoid chain of resets executing from @@ -4347,7 +4344,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_fbdev_set_suspend(tmp_adev, 1); /* disable ras on ALL IPs */ - if (!(in_ras_intr && !use_baco) && + if (!need_emergency_restart && amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); @@ -4359,12 +4356,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_stop(&ring->sched, job ? &job->base : NULL); - if (in_ras_intr && !use_baco) + if (need_emergency_restart) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } - if (in_ras_intr && !use_baco) + if (need_emergency_restart) goto skip_sched_resume; /* @@ -4441,7 +4438,7 @@ skip_hw_reset: skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ - if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) + if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); if (audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 4ef38c2411ae..a50ff2306504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -133,7 +133,7 @@ static int hw_id_map[MAX_HWIP] = { static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) { uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - uint64_t pos = vram_size - adev->discovery_tmr_size; + uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, adev->discovery_tmr_size, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index d50d597c45ed..8f6183801cb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,7 +24,8 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ -#define DISCOVERY_TMR_SIZE (64 << 10) +#define DISCOVERY_TMR_SIZE (4 << 10) +#define DISCOVERY_TMR_OFFSET (64 << 10) void amdgpu_discovery_fini(struct amdgpu_device *adev); int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 6a39996487b9..2082c0acd216 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -911,8 +911,7 @@ int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; @@ -929,8 +928,7 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) if (is_support_sw_smu(adev)) { ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK, low ? &clk_freq : NULL, - !low ? &clk_freq : NULL, - true); + !low ? &clk_freq : NULL); if (ret) return 0; return clk_freq * 100; @@ -1141,6 +1139,26 @@ int amdgpu_dpm_baco_reset(struct amdgpu_device *adev) return 0; } +bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) + return smu_mode1_reset_is_support(smu); + + return false; +} + +int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (is_support_sw_smu(adev)) + return smu_mode1_reset(smu); + + return -EOPNOTSUPP; +} + int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 6a8aae70a0e6..7f3cd7185650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -529,6 +529,9 @@ int amdgpu_dpm_mode2_reset(struct amdgpu_device *adev); bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev); +bool amdgpu_dpm_is_mode1_reset_supported(struct amdgpu_device *adev); +int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); + int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2f12f3ae3c7f..2eacf1f51bbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -149,6 +149,7 @@ int amdgpu_mes = 0; int amdgpu_noretry; int amdgpu_force_asic_type = -1; int amdgpu_tmz = 0; +int amdgpu_reset_method = -1; /* auto */ struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -757,6 +758,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); +/** + * DOC: reset_method (int) + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) + */ +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); +module_param_named(reset_method, amdgpu_reset_method, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8d84975885cd..58d4c219178a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -424,9 +424,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; - DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr " - "0x%016llx, cpu addr 0x%p\n", ring->name, - ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); + DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n", + ring->name, ring->fence_drv.gpu_addr); return 0; } @@ -782,8 +781,10 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) if (amdgpu_sriov_vf(adev)) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1); - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, + ARRAY_SIZE(amdgpu_debugfs_fence_list_sriov)); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, + ARRAY_SIZE(amdgpu_debugfs_fence_list)); #else return 0; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index de9784b0c19b..7f9e50247413 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -930,7 +930,8 @@ static const struct drm_info_list amdgpu_debugfs_gem_list[] = { int amdgpu_debugfs_gem_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, + ARRAY_SIZE(amdgpu_debugfs_gem_list)); #endif return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 2bd9423c1dab..acdb61cfa24c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -83,6 +83,15 @@ struct amdgpu_vmhub { uint32_t vm_context0_cntl; uint32_t vm_l2_pro_fault_status; uint32_t vm_l2_pro_fault_cntl; + + /* + * store the register distances between two continuous context domain + * and invalidation engine. + */ + uint32_t ctx_distance; + uint32_t ctx_addr_distance; /* include LO32/HI32 */ + uint32_t eng_distance; + uint32_t eng_addr_distance; /* include LO32/HI32 */ }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4ffc32b78745..dcd492170598 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -468,7 +468,8 @@ static const struct drm_info_list amdgpu_debugfs_sa_list[] = { int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, + ARRAY_SIZE(amdgpu_debugfs_sa_list)); #else return 0; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 267fa45ddb66..7521f4ab55de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -574,6 +574,9 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) INIT_LIST_HEAD(&id_mgr->ids_lru); atomic_set(&id_mgr->reserved_vmid_num, 0); + /* manage only VMIDs not used by KFD */ + id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; + /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { amdgpu_vmid_reset(adev, i, j); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2975c4a6e581..937029ad5271 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,7 +37,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) memset(&ti, 0, sizeof(struct amdgpu_task_info)); - if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { + if (amdgpu_gpu_recovery && + amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { DRM_ERROR("ring %s timeout, but soft recovered\n", s_job->sched->name); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 347b06d3c140..20f39aa04fb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1105,7 +1105,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); @@ -1173,7 +1173,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); @@ -1241,7 +1241,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); else @@ -1311,7 +1311,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); else @@ -1381,7 +1381,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); else @@ -1451,7 +1451,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, } if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask, true); + ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); else @@ -2960,7 +2960,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", sclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev, @@ -2997,7 +2997,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, if (r) return r; - return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000); + return snprintf(buf, PAGE_SIZE, "%u\n", mclk * 10 * 1000); } static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, @@ -3558,21 +3558,34 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); - if (ret) - DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", - enable ? "enable" : "disable", ret); - - /* enable/disable Low Memory PState for UVD (4k videos) */ - if (adev->asic_type == CHIP_STONEY && - adev->uvd.decode_image_width >= WIDTH_4K) { - struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.uvd_active = true; + adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; + } else { + adev->pm.dpm.uvd_active = false; + } + mutex_unlock(&adev->pm.mutex); - if (hwmgr && hwmgr->hwmgr_func && - hwmgr->hwmgr_func->update_nbdpm_pstate) - hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, - !enable, - true); + amdgpu_pm_compute_clocks(adev); + } else { + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); + if (ret) + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + + /* enable/disable Low Memory PState for UVD (4k videos) */ + if (adev->asic_type == CHIP_STONEY && + adev->uvd.decode_image_width >= WIDTH_4K) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + + if (hwmgr && hwmgr->hwmgr_func && + hwmgr->hwmgr_func->update_nbdpm_pstate) + hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, + !enable, + true); + } } } @@ -3580,10 +3593,24 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); - if (ret) - DRM_ERROR("Dpm %s vce failed, ret = %d. \n", - enable ? "enable" : "disable", ret); + if (adev->family == AMDGPU_FAMILY_SI) { + mutex_lock(&adev->pm.mutex); + if (enable) { + adev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; + } else { + adev->pm.dpm.vce_active = false; + } + mutex_unlock(&adev->pm.mutex); + + amdgpu_pm_compute_clocks(adev); + } else { + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); + if (ret) + DRM_ERROR("Dpm %s vce failed, ret = %d. \n", + enable ? "enable" : "disable", ret); + } } void amdgpu_pm_print_power_states(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9342a9e8cadf..aa80cf799e42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -99,6 +99,7 @@ static int psp_early_init(void *handle) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -430,6 +431,52 @@ static int psp_tmr_load(struct psp_context *psp) return ret; } +static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd) +{ + if (amdgpu_sriov_vf(psp->adev)) + cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR; + else + cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR; +} + +static int psp_tmr_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_tmr_unload_cmd_buf(psp, cmd); + DRM_INFO("free PSP TMR buffer\n"); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_tmr_terminate(struct psp_context *psp) +{ + int ret; + void *tmr_buf; + void **pptr; + + ret = psp_tmr_unload(psp); + if (ret) + return ret; + + /* free TMR memory buffer */ + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + + return 0; +} + static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t asd_mc, uint32_t size) { @@ -452,7 +499,9 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || (psp->adev->asic_type == CHIP_SIENNA_CICHLID)) + if (amdgpu_sriov_vf(psp->adev) || + (psp->adev->asic_type == CHIP_SIENNA_CICHLID) || + (psp->adev->asic_type == CHIP_NAVY_FLOUNDER)) return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); @@ -1717,7 +1766,8 @@ static int psp_np_fw_load(struct psp_context *psp) continue; if (psp->autoload_supported && - adev->asic_type == CHIP_SIENNA_CICHLID && + (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3)) @@ -1866,8 +1916,6 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - void *tmr_buf; - void **pptr; int ret; if (psp->adev->psp.ta_fw) { @@ -1883,10 +1931,9 @@ static int psp_hw_fini(void *handle) return ret; } + psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -1933,6 +1980,18 @@ static int psp_suspend(void *handle) } } + ret = psp_asd_unload(psp); + if (ret) { + DRM_ERROR("Failed to unload asd\n"); + return ret; + } + + ret = psp_tmr_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate tmr\n"); + return ret; + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); @@ -2222,6 +2281,107 @@ out: return err; } +int parse_ta_bin_descriptor(struct psp_context *psp, + const struct ta_fw_bin_desc *desc, + const struct ta_firmware_header_v2_0 *ta_hdr) +{ + uint8_t *ucode_start_addr = NULL; + + if (!psp || !desc || !ta_hdr) + return -EINVAL; + + ucode_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(desc->offset_bytes) + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + switch (desc->fw_type) { + case TA_FW_TYPE_PSP_ASD: + psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_feature_version = le32_to_cpu(desc->fw_version); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_XGMI: + psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_xgmi_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_xgmi_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_RAS: + psp->ta_ras_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_ras_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_ras_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_HDCP: + psp->ta_hdcp_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_hdcp_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_hdcp_start_addr = ucode_start_addr; + break; + case TA_FW_TYPE_PSP_DTM: + psp->ta_dtm_ucode_version = le32_to_cpu(desc->fw_version); + psp->ta_dtm_ucode_size = le32_to_cpu(desc->size_bytes); + psp->ta_dtm_start_addr = ucode_start_addr; + break; + default: + dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type); + break; + } + + return 0; +} + +int psp_init_ta_microcode(struct psp_context *psp, + const char *chip_name) +{ + struct amdgpu_device *adev = psp->adev; + char fw_name[30]; + const struct ta_firmware_header_v2_0 *ta_hdr; + int err = 0; + int ta_index = 0; + + if (!chip_name) { + dev_err(adev->dev, "invalid chip name for ta microcode\n"); + return -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out; + + ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data; + + if (le16_to_cpu(ta_hdr->header.header_version_major) != 2) { + dev_err(adev->dev, "unsupported TA header version\n"); + err = -EINVAL; + goto out; + } + + if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_TA_PACKAGING) { + dev_err(adev->dev, "packed TA count exceeds maximum limit\n"); + err = -EINVAL; + goto out; + } + + for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) { + err = parse_ta_bin_descriptor(psp, + &ta_hdr->ta_fw_bin[ta_index], + ta_hdr); + if (err) + goto out; + } + + return 0; +out: + dev_err(adev->dev, "fail to initialize ta microcode\n"); + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + return err; +} + static int psp_set_clockgating_state(void *handle, enum amd_clockgating_state state) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 1513887e7a68..623888bf30cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -371,4 +371,6 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name); int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name); +int psp_init_ta_microcode(struct psp_context *psp, + const char *chip_name); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index ab8e7c91c645..e10f02ed3f65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2131,3 +2131,14 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) amdgpu_ras_reset_gpu(adev); } } + +bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_VEGA20 && + adev->pm.fw_version <= 0x283400) { + return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) && + amdgpu_ras_intr_triggered(); + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e7df5d8429f8..b2667342cf67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -633,4 +633,5 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev); void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready); +bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 5da20fc166d9..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -35,7 +35,7 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) -TRACE_EVENT(amdgpu_device_rreg, +TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( @@ -54,7 +54,7 @@ TRACE_EVENT(amdgpu_device_rreg, (unsigned long)__entry->value) ); -TRACE_EVENT(amdgpu_device_wreg, +TRACE_EVENT(amdgpu_mm_wreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 62fa1e691800..fcff5671f6f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1869,7 +1869,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) adev->discovery_tmr_size = amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); if (!adev->discovery_tmr_size) - adev->discovery_tmr_size = DISCOVERY_TMR_SIZE; + adev->discovery_tmr_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { /* reserve vram for mem train according to TMR location */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 744404a05fee..183743c5fb7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -390,11 +390,11 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; - default: DRM_ERROR("Unknown firmware load type\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index df402c7b3233..12a8bc8fca0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -124,6 +124,29 @@ struct ta_firmware_header_v1_0 { uint32_t ta_dtm_size_bytes; }; +enum ta_fw_type { + TA_FW_TYPE_UNKOWN, + TA_FW_TYPE_PSP_ASD, + TA_FW_TYPE_PSP_XGMI, + TA_FW_TYPE_PSP_RAS, + TA_FW_TYPE_PSP_HDCP, + TA_FW_TYPE_PSP_DTM, +}; + +struct ta_fw_bin_desc { + uint32_t fw_type; + uint32_t fw_version; + uint32_t offset_bytes; + uint32_t size_bytes; +}; + +/* version_major=2, version_minor=0 */ +struct ta_firmware_header_v2_0 { + struct common_firmware_header header; + uint32_t ta_fw_bin_count; + struct ta_fw_bin_desc ta_fw_bin[]; +}; + /* version_major=1, version_minor=0 */ struct gfx_firmware_header_v1_0 { struct common_firmware_header header; @@ -276,6 +299,7 @@ union amdgpu_firmware_header { struct psp_firmware_header_v1_1 psp_v1_1; struct psp_firmware_header_v1_3 psp_v1_3; struct ta_firmware_header_v1_0 ta; + struct ta_firmware_header_v2_0 ta_v2_0; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; @@ -288,6 +312,8 @@ union amdgpu_firmware_header { uint8_t raw[0x100]; }; +#define UCODE_MAX_TA_PACKAGING ((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct ta_fw_bin_desc)) + /* * fw loading support */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 15ff30c53e24..a777d585db84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -43,6 +43,7 @@ #define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" #define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" #define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" +#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -53,6 +54,7 @@ MODULE_FIRMWARE(FIRMWARE_NAVI10); MODULE_FIRMWARE(FIRMWARE_NAVI14); MODULE_FIRMWARE(FIRMWARE_NAVI12); MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); +MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -115,6 +117,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_NAVY_FLOUNDER: + fw_name = FIRMWARE_NAVY_FLOUNDER; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -421,6 +429,10 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; + /* VCN in SRIOV does not support direct register read/write */ + if (amdgpu_sriov_vf(adev)) + return 0; + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index da233a9e429d..1203c20491e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -27,6 +27,9 @@ #include "amdgpu.h" #include "amdgpu_ras.h" +#include "vi.h" +#include "soc15.h" +#include "nv.h" bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { @@ -513,6 +516,31 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } + + /* we have the ability to check now */ + if (amdgpu_sriov_vf(adev)) { + switch (adev->asic_type) { + case CHIP_TONGA: + case CHIP_FIJI: + vi_set_virt_ops(adev); + break; + case CHIP_VEGA10: + case CHIP_VEGA20: + case CHIP_ARCTURUS: + soc15_set_virt_ops(adev); + break; + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_SIENNA_CICHLID: + nv_set_virt_ops(adev); + /* try send GPU_INIT_DATA request to host */ + amdgpu_virt_request_init_data(adev); + break; + default: /* other chip doesn't support SRIOV */ + DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type); + break; + } + } } static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c8e68d7890bf..770025a5e500 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -324,6 +324,7 @@ struct amdgpu_vm { struct amdgpu_vm_manager { /* Handling of VMIDs */ struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS]; + unsigned int first_kfd_vmid; /* Handling of VM fences */ u64 fence_context; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 0219bd6ce1b2..939eca63b094 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -73,6 +73,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); athub_v2_1_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fe306d0f73f7..c2c67ab68a43 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1326,6 +1326,14 @@ cik_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 5f3f6ebfb387..55982c0064b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -54,8 +54,6 @@ #define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 #define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 -#define AMDGPU_NUM_OF_VMIDS 8 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 323285eb1457..61e89247faf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -145,6 +145,13 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -3114,6 +3121,48 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = /* Pending on emulation bring up */ }; +static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_START_PHASE, 0x000000ff, 0x00000004), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xffffffff, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3302,6 +3351,12 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_sienna_cichlid, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid)); break; + case CHIP_NAVY_FLOUNDER: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_2, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_2)); + break; + default: break; } @@ -3483,6 +3538,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.cp_fw_write_wait = true; break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.cp_fw_write_wait = true; break; default: @@ -3578,6 +3634,9 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -4108,6 +4167,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4230,6 +4290,7 @@ static int gfx_v10_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4484,7 +4545,8 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade /* for ASICs that integrates GFX v10.3 * pa_sc_tile_steering_override should be set to 0 */ - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) return 0; /* init num_sc */ @@ -4512,8 +4574,6 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade } #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) { @@ -4529,7 +4589,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -4540,7 +4600,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); @@ -4712,12 +4772,19 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, - adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, - adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); - + if (adev->asic_type == CHIP_NAVI12) { + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + } else { + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + } return 0; } @@ -5325,7 +5392,12 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); - WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); + + if (adev->asic_type == CHIP_NAVI12) { + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); + } else { + WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + } for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) @@ -5710,6 +5782,7 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -5842,6 +5915,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) if (enable) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -5851,6 +5925,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -5944,6 +6019,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) /* tell RLC which is KIQ queue */ switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -6647,6 +6723,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) * has been remapped to mmVGT_ESGS_RING_SIZE */ switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); @@ -6685,6 +6762,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -6975,6 +7053,7 @@ static int gfx_v10_0_soft_reset(void *handle) tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, @@ -7073,6 +7152,7 @@ static int gfx_v10_0_early_init(void *handle) adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: @@ -7125,6 +7205,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7156,6 +7237,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) data = RLC_SAFE_MODE__CMD_MASK; switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -7468,6 +7550,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -7483,12 +7566,12 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) int data; /* AMD_CG_SUPPORT_GFX_MGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) *flags |= AMD_CG_SUPPORT_GFX_MGCG; /* AMD_CG_SUPPORT_GFX_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CGCG; @@ -7497,17 +7580,17 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_GFX_CGLS; /* AMD_CG_SUPPORT_GFX_RLC_LS */ - data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_CP_LS */ - data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; @@ -7685,14 +7768,9 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if (adev->pdev->device == 0x50) - int_sel = false; - /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | @@ -7843,12 +7921,17 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &kiq->ring; + unsigned long flags; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); return -ENOMEM; + } /* assert preemption condition */ amdgpu_ring_set_preempt_cond_exec(ring, false); @@ -7859,6 +7942,8 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) ++ring->trail_seq); amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + /* poll the trailing fence */ for (i = 0; i < adev->usec_timeout; i++) { if (ring->trail_seq == @@ -8567,6 +8652,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 4aec76049a60..04eaf3a8fddb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1850,8 +1850,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) * */ #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -1869,7 +1867,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -1882,7 +1880,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); WREG32(amdgpu_gds_reg_offset[i].gws, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index efb759b62d21..33f1c4a46ebe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3686,8 +3686,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) * */ #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -3710,7 +3708,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__PRIVATE_ATC_MASK; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { vi_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -3723,7 +3721,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); WREG32(amdgpu_gds_reg_offset[i].gws, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 99ffc3e1fddc..cb9d60a4e05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2463,8 +2463,6 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) } #define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -2484,7 +2482,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); @@ -2495,7 +2493,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) /* Initialize all compute VMIDs to have no GDS, GWS, or OA acccess. These should be enabled by FW for target VMIDs. */ - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 6682b843bafe..529e46386a50 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -38,15 +38,15 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ - int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -207,6 +207,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -245,25 +246,31 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -299,12 +306,14 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); @@ -360,7 +369,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, CRASH_ON_NO_RETRY_FAULT, 1); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, CRASH_ON_RETRY_FAULT, 1); - } + } WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } @@ -386,4 +395,11 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 6939edfc5232..394e6f56948a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -49,15 +49,15 @@ u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ - int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -218,6 +218,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; int i; uint32_t tmp; @@ -247,25 +248,31 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -287,12 +294,14 @@ int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); @@ -373,4 +382,12 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ - + mmGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index fcc4c1912513..fa0bca3e1f73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -49,15 +49,15 @@ u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmGCVM_CONTEXT0_* to mmGCVM_CONTEXT1_* */ - int offset = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void gfxhub_v2_1_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -207,6 +207,7 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; int i; uint32_t tmp; @@ -236,25 +237,31 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; unsigned i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -288,12 +295,14 @@ int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); @@ -372,6 +381,14 @@ void gfxhub_v2_1_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmGCVM_CONTEXT1_CNTL - mmGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmGCVM_INVALIDATE_ENG1_REQ - + mmGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f7e66bf0f647..ec90c62078d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -49,8 +49,6 @@ #include "mmhub_v2_0.h" #include "athub_v2_0.h" #include "athub_v2_1.h" -/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 #if 0 static const struct soc15_reg_golden golden_settings_navi10_hdp[] = @@ -88,7 +86,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -97,7 +95,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp &= ~bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -107,7 +105,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* MM HUB */ hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); @@ -116,7 +114,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, /* GFX HUB */ hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; + reg = hub->vm_context0_cntl + hub->ctx_distance * i; tmp = RREG32(reg); tmp |= bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); @@ -285,7 +283,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -295,18 +294,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng); tmp &= 1 << vmid; if (tmp) break; @@ -320,7 +320,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); spin_unlock(&adev->gmc.invalidate_lock); @@ -360,8 +361,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); - u32 req = hub->vm_inv_eng0_req + eng; - u32 ack = hub->vm_inv_eng0_ack + eng; + u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); @@ -504,16 +505,21 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -522,7 +528,8 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } @@ -686,7 +693,8 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) base = gfxhub_v2_1_get_fb_location(adev); else base = gfxhub_v2_0_get_fb_location(adev); @@ -698,7 +706,8 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_gart_location(adev, mc); /* base offset of vram pages */ - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev); else adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); @@ -746,6 +755,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -814,7 +824,8 @@ static int gmc_v10_0_sw_init(void *handle) int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) gfxhub_v2_1_init(adev); else gfxhub_v2_0_init(adev); @@ -840,6 +851,7 @@ static int gmc_v10_0_sw_init(void *handle) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -905,8 +917,7 @@ static int gmc_v10_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); @@ -945,6 +956,7 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: break; @@ -971,7 +983,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) r = gfxhub_v2_1_gart_enable(adev); else r = gfxhub_v2_0_gart_enable(adev); @@ -995,7 +1008,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) gfxhub_v2_1_set_fault_enable_default(adev, value); else gfxhub_v2_0_set_fault_enable_default(adev, value); @@ -1036,7 +1050,8 @@ static int gmc_v10_0_hw_init(void *handle) */ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) gfxhub_v2_1_gart_disable(adev); else gfxhub_v2_0_gart_disable(adev); @@ -1110,7 +1125,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle, if (r) return r; - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) return athub_v2_1_set_clockgating(adev, state); else return athub_v2_0_set_clockgating(adev, state); @@ -1122,7 +1138,8 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) mmhub_v2_0_get_clockgating(adev, flags); - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) athub_v2_1_get_clockgating(adev, flags); else athub_v2_0_get_clockgating(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index a75e472b4a81..538e7ee35cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -878,7 +878,7 @@ static int gmc_v6_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index bcd4baecfe11..e18296dc1386 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1052,7 +1052,7 @@ static int gmc_v7_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 26976e50e2a2..a9e722b8a458 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1177,7 +1177,7 @@ static int gmc_v8_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 11e93a82131d..6e4f3ff4810f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -68,9 +68,6 @@ #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L -/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 - static const u32 golden_settings_vega10_hdp[] = { 0xf64, 0x0fffffff, 0x00000000, @@ -505,11 +502,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && !adev->in_gpu_reset) { - uint32_t req = hub->vm_inv_eng0_req + eng; - uint32_t ack = hub->vm_inv_eng0_ack + eng; + uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + 1 << vmid); return; } @@ -526,7 +523,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -537,7 +535,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } do { - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to @@ -545,10 +544,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * GRBM interface. */ if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + RREG32_NO_KIQ(hub->vm_inv_eng0_req + + hub->eng_distance * eng); for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + + hub->eng_distance * eng); if (tmp & (1 << vmid)) break; udelay(1); @@ -564,7 +565,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); spin_unlock(&adev->gmc.invalidate_lock); @@ -679,16 +681,21 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -697,7 +704,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } @@ -1248,12 +1256,15 @@ static int gmc_v9_0_sw_init(void *handle) /* * number of VMs * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 + * amdgpu graphics/compute will use VMIDs 1..n-1 + * amdkfd will use VMIDs n..15 + * + * The first KFD VMID is 8 for GPUs with graphics, 3 for + * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs + * for video processing. */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = + adev->asic_type == CHIP_ARCTURUS ? 3 : 8; amdgpu_vm_manager_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 4c6c7544b400..bc300283b6ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -377,7 +377,7 @@ static void jpeg_v1_0_decode_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; jpeg_v1_0_decode_ring_emit_reg_wait(ring, data0, data1, mask); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index c5f49129a300..94caf5204c8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -629,7 +629,7 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; jpeg_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 405767208a4d..dffcb93ecee5 100755 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -54,15 +54,15 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ - int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -230,6 +230,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -268,25 +269,31 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -333,12 +340,14 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); @@ -429,6 +438,12 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + hub->ctx_distance = mmVM_CONTEXT1_CNTL - mmVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmVM_INVALIDATE_ENG1_REQ - mmVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index af0866af63a5..757fa8e83f5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -39,15 +39,15 @@ void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - /* two registers distance between mmMMVM_CONTEXT0_* to mmMMVM_CONTEXT1_* */ - int offset = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - offset * vmid, lower_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - offset * vmid, upper_32_bits(page_table_base)); + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); } static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) @@ -209,6 +209,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; int i; uint32_t tmp; @@ -239,25 +240,31 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); } } static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - 2 * i, 0xffffffff); + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - 2 * i, 0x1f); + i * hub->eng_addr_distance, 0x1f); } } @@ -279,12 +286,14 @@ int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i; /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); @@ -365,6 +374,13 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ - + mmMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -374,6 +390,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; @@ -406,6 +423,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: if (def != data) WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); if (def1 != data1) @@ -427,6 +445,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); break; default: @@ -442,6 +461,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade if (def != data) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); break; default: @@ -462,6 +482,7 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: mmhub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, @@ -483,6 +504,7 @@ void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c0e3efcb09bf..9979f54fef57 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -57,20 +57,16 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { - /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to - * mmVML2VC0_VM_CONTEXT1_* - */ - int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, lower_32_bits(value)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + hub->ctx_addr_distance * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, upper_32_bits(value)); } @@ -301,6 +297,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; uint32_t tmp; int i; @@ -335,21 +332,25 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !amdgpu_noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, - tmp); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, lower_32_bits(adev->vm_manager.max_pfn - 1)); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->ctx_addr_distance, upper_32_bits(adev->vm_manager.max_pfn - 1)); } } @@ -357,16 +358,19 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, int hubid) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; unsigned i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->eng_addr_distance, 0xffffffff); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + + i * hub->eng_addr_distance, 0x1f); } } @@ -395,6 +399,7 @@ int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) { + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; u32 tmp; u32 i, j; @@ -404,7 +409,7 @@ void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, j * MMHUB_INSTANCE_REGISTER_OFFSET + - i, 0); + i * hub->ctx_distance, 0); /* Setup TLB control */ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, @@ -534,6 +539,15 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + + hub[i]->ctx_distance = mmVML2VC0_VM_CONTEXT1_CNTL - + mmVML2VC0_VM_CONTEXT0_CNTL; + hub[i]->ctx_addr_distance = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub[i]->eng_distance = mmVML2VC0_VM_INVALIDATE_ENG1_REQ - + mmVML2VC0_VM_INVALIDATE_ENG0_REQ; + hub[i]->eng_addr_distance = mmVML2VC0_VM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h new file mode 100644 index 000000000000..3e4e858a6965 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h @@ -0,0 +1,130 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V3_0_H__ +#define __MMSCH_V3_0_H__ + +#include "amdgpu_vcn.h" + +#define MMSCH_VERSION_MAJOR 3 +#define MMSCH_VERSION_MINOR 0 +#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) + +enum mmsch_v3_0_command_type { + MMSCH_COMMAND__DIRECT_REG_WRITE = 0, + MMSCH_COMMAND__DIRECT_REG_POLLING = 2, + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3, + MMSCH_COMMAND__INDIRECT_REG_WRITE = 8, + MMSCH_COMMAND__END = 0xf +}; + +struct mmsch_v3_0_table_info { + uint32_t init_status; + uint32_t table_offset; + uint32_t table_size; +}; + +struct mmsch_v3_0_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES]; +}; + +struct mmsch_v3_0_cmd_direct_reg_header { + uint32_t reg_offset : 28; + uint32_t command_type : 4; +}; + +struct mmsch_v3_0_cmd_indirect_reg_header { + uint32_t reg_offset : 20; + uint32_t reg_idx_space : 8; + uint32_t command_type : 4; +}; + +struct mmsch_v3_0_cmd_direct_write { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t reg_value; +}; + +struct mmsch_v3_0_cmd_direct_read_modify_write { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t write_data; + uint32_t mask_value; +}; + +struct mmsch_v3_0_cmd_direct_polling { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; + uint32_t mask_value; + uint32_t wait_value; +}; + +struct mmsch_v3_0_cmd_end { + struct mmsch_v3_0_cmd_direct_reg_header cmd_header; +}; + +struct mmsch_v3_0_cmd_indirect_write { + struct mmsch_v3_0_cmd_indirect_reg_header cmd_header; + uint32_t reg_value; +}; + +#define MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_read_modify_write); \ + size_dw = size / 4; \ + direct_rd_mod_wt.cmd_header.reg_offset = reg; \ + direct_rd_mod_wt.mask_value = mask; \ + direct_rd_mod_wt.write_data = data; \ + memcpy((void *)table_loc, &direct_rd_mod_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_DIRECT_WT(reg, value) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_write); \ + size_dw = size / 4; \ + direct_wt.cmd_header.reg_offset = reg; \ + direct_wt.reg_value = value; \ + memcpy((void *)table_loc, &direct_wt, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_DIRECT_POLL(reg, mask, wait) { \ + size = sizeof(struct mmsch_v3_0_cmd_direct_polling); \ + size_dw = size / 4; \ + direct_poll.cmd_header.reg_offset = reg; \ + direct_poll.mask_value = mask; \ + direct_poll.wait_value = wait; \ + memcpy((void *)table_loc, &direct_poll, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#define MMSCH_V3_0_INSERT_END() { \ + size = sizeof(struct mmsch_v3_0_cmd_end); \ + size_dw = size / 4; \ + memcpy((void *)table_loc, &end, size); \ + table_loc += size_dw; \ + table_size += size_dw; \ +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 471dc82fd1aa..fdabaf0db3e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -270,6 +270,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) if (ih->use_bus_addr) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid); ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a7cfe3ac7cb6..479991b71295 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -265,17 +265,21 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev) amdgpu_atombios_scratch_regs_engine_hung(adev, true); - dev_info(adev->dev, "GPU mode1 reset\n"); - /* disable BM */ pci_clear_master(adev->pdev); pci_save_state(adev->pdev); - ret = psp_gpu_reset(adev); + if (amdgpu_dpm_is_mode1_reset_supported(adev)) { + dev_info(adev->dev, "GPU smu mode1 reset\n"); + ret = amdgpu_dpm_mode1_reset(adev); + } else { + dev_info(adev->dev, "GPU psp mode1 reset\n"); + ret = psp_gpu_reset(adev); + } + if (ret) dev_err(adev->dev, "GPU mode1 reset failed\n"); - pci_restore_state(adev->pdev); /* wait for asic to come out of reset */ @@ -307,7 +311,15 @@ nv_asic_reset_method(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; - if (!amdgpu_sriov_vf(adev) && smu_baco_is_support(smu)) + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + + if (smu_baco_is_support(smu)) return AMD_RESET_METHOD_BACO; else return AMD_RESET_METHOD_MODE1; @@ -319,15 +331,16 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + dev_info(adev->dev, "GPU BACO reset\n"); + ret = smu_baco_enter(smu); if (ret) return ret; ret = smu_baco_exit(smu); if (ret) return ret; - } else { + } else ret = nv_asic_mode1_reset(adev); - } return ret; } @@ -411,6 +424,7 @@ legacy_init: navi12_reg_base_init(adev); break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: sienna_cichlid_reg_base_init(adev); break; default: @@ -420,6 +434,11 @@ legacy_init: return 0; } +void nv_set_virt_ops(struct amdgpu_device *adev) +{ + adev->virt.ops = &xgpu_nv_virt_ops; +} + int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; @@ -427,12 +446,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v2_3_funcs; adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - if (amdgpu_sriov_vf(adev)) { - adev->virt.ops = &xgpu_nv_virt_ops; - /* try send GPU_INIT_DATA request to host */ - amdgpu_virt_request_init_data(adev); - } - /* Set IP register base before any HW register access */ r = nv_reg_base_init(adev); if (r) @@ -504,10 +517,35 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); - amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; + case CHIP_NAVY_FLOUNDER: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + break; default: return -EINVAL; } @@ -708,8 +746,7 @@ static int nv_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | - AMD_PG_SUPPORT_ATHUB | - AMD_PG_SUPPORT_MMHUB; + AMD_PG_SUPPORT_ATHUB; /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, * as a consequence, the rev_id and external_rev_id are wrong. * workaround it by hardcoding rev_id to 0 (default value). @@ -732,9 +769,34 @@ static int nv_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | - AMD_PG_SUPPORT_ATHUB; + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + if (amdgpu_sriov_vf(adev)) { + /* hypervisor control CG and PG enablement */ + adev->cg_flags = 0; + adev->pg_flags = 0; + } adev->external_rev_id = adev->rev_id + 0x28; break; + case CHIP_NAVY_FLOUNDER: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_IH_CG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + adev->external_rev_id = adev->rev_id + 0x32; + break; + default: /* FIXME: not supported yet */ return -EINVAL; @@ -961,6 +1023,7 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index b6a95f0122fb..aeef50a6a54b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -28,6 +28,7 @@ void nv_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void nv_set_virt_ops(struct amdgpu_device *adev); int nv_set_ip_blocks(struct amdgpu_device *adev); int navi10_reg_base_init(struct amdgpu_device *adev); int navi14_reg_base_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 423386272920..77f99811cd85 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -57,6 +57,8 @@ MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_asd.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_asd.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -100,6 +102,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -108,7 +113,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) if (err) return err; - if (adev->asic_type != CHIP_SIENNA_CICHLID) { + if (adev->asic_type != CHIP_SIENNA_CICHLID && + adev->asic_type != CHIP_NAVY_FLOUNDER) { err = psp_init_asd_microcode(psp, chip_name); if (err) return err; @@ -173,6 +179,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) } break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: BUG(); @@ -397,7 +404,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if ((!amdgpu_sriov_vf(adev)) && - (adev->asic_type != CHIP_SIENNA_CICHLID)) + (adev->asic_type != CHIP_SIENNA_CICHLID) && + (adev->asic_type != CHIP_NAVY_FLOUNDER)) psp_v11_0_reroute_ih(psp); ring = &psp->km_ring; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1baeddf2f1e6..e2232dd12d8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -315,30 +315,20 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - wptr = &local_wptr; - lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); + wptr = wptr << 32; + wptr |= RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); } - return *wptr; + return wptr >> 2; } /** @@ -485,7 +475,6 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | @@ -508,8 +497,7 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { + if (flags & AMDGPU_FENCE_FLAG_INT) { /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); @@ -887,10 +875,6 @@ static int sdma_v5_0_start(struct amdgpu_device *adev) r = sdma_v5_0_load_microcode(adev); if (r) return r; - - /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */ - if (amdgpu_emu_mode == 1 && adev->pdev->device == 0x4d) - msleep(1000); } /* unhalt the MEs */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 318d32e2bbf6..46a9617fee5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -45,6 +45,7 @@ #include "sdma_v5_2.h" MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA3_REG_OFFSET 0x400 @@ -85,6 +86,7 @@ static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: break; default: break; @@ -152,6 +154,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: BUG(); } @@ -167,7 +172,8 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) goto out; for (i = 1; i < adev->sdma.num_instances; i++) { - if (adev->asic_type == CHIP_SIENNA_CICHLID) { + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) { memcpy((void*)&adev->sdma.instance[i], (void*)&adev->sdma.instance[0], sizeof(struct amdgpu_sdma_instance)); @@ -262,30 +268,20 @@ static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - wptr = &local_wptr; - lowbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); + wptr = wptr << 32; + wptr |= RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", ring->me, wptr); } - return *wptr; + return wptr >> 2; } /** @@ -417,7 +413,6 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { - struct amdgpu_device *adev = ring->adev; bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; /* write the fence */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | @@ -440,8 +435,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } - /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ - if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { + if (flags & AMDGPU_FENCE_FLAG_INT) { /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); @@ -1167,7 +1161,16 @@ static int sdma_v5_2_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->sdma.num_instances = 4; + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + adev->sdma.num_instances = 4; + break; + case CHIP_NAVY_FLOUNDER: + adev->sdma.num_instances = 2; + break; + default: + break; + } sdma_v5_2_set_ring_funcs(adev); sdma_v5_2_set_buffer_funcs(adev); @@ -1177,6 +1180,40 @@ static int sdma_v5_2_early_init(void *handle) return 0; } +static unsigned sdma_v5_2_seq_to_irq_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SOC15_IH_CLIENTID_SDMA0; + case 1: + return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid; + default: + break; + } + return -EINVAL; +} + +static unsigned sdma_v5_2_seq_to_trap_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SDMA0_5_0__SRCID__SDMA_TRAP; + case 1: + return SDMA1_5_0__SRCID__SDMA_TRAP; + case 2: + return SDMA2_5_0__SRCID__SDMA_TRAP; + case 3: + return SDMA3_5_0__SRCID__SDMA_TRAP; + default: + break; + } + return -EINVAL; +} + static int sdma_v5_2_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -1184,32 +1221,13 @@ static int sdma_v5_2_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, - SDMA0_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, - SDMA1_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA2, - SDMA2_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid, - SDMA3_5_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v5_2_seq_to_irq_id(i), + sdma_v5_2_seq_to_trap_id(i), + &adev->sdma.trap_irq); + if (r) + return r; + } r = sdma_v5_2_init_microcode(adev); if (r) { @@ -1545,6 +1563,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: sdma_v5_2_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v5_2_update_medium_grain_light_sleep(adev, @@ -1572,7 +1591,7 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) *flags = 0; /* AMD_CG_SUPPORT_SDMA_LS */ - data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); + data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) *flags |= AMD_CG_SUPPORT_SDMA_LS; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9b12285177e3..1b449291f068 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev) static enum amd_reset_method si_asic_reset_method(struct amdgpu_device *adev) { + if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY && + amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + return AMD_RESET_METHOD_LEGACY; } @@ -1267,12 +1272,6 @@ static u32 si_get_xclk(struct amdgpu_device *adev) return reference_clock; } -//xxx:not implemented -static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) -{ - return 0; -} - static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) { @@ -1428,6 +1427,358 @@ static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } +static int si_uvd_send_upll_ctlreq(struct amdgpu_device *adev, + unsigned cg_upll_func_cntl) +{ + unsigned i; + + /* Make sure UPLL_CTLREQ is deasserted */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* Assert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* Wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + + if ((RREG32(cg_upll_func_cntl) & mask) == mask) + break; + mdelay(10); + } + + /* Deassert UPLL_CTLREQ */ + WREG32_P(cg_upll_func_cntl, 0, ~UPLL_CTLREQ_MASK); + + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static unsigned si_uvd_calc_upll_post_div(unsigned vco_freq, + unsigned target_freq, + unsigned pd_min, + unsigned pd_even) +{ + unsigned post_div = vco_freq / target_freq; + + /* Adjust to post divider minimum value */ + if (post_div < pd_min) + post_div = pd_min; + + /* We alway need a frequency less than or equal the target */ + if ((vco_freq / post_div) > target_freq) + post_div += 1; + + /* Post dividers above a certain value must be even */ + if (post_div > pd_even && post_div % 2) + post_div += 1; + + return post_div; +} + +/** + * si_calc_upll_dividers - calc UPLL clock dividers + * + * @adev: amdgpu_device pointer + * @vclk: wanted VCLK + * @dclk: wanted DCLK + * @vco_min: minimum VCO frequency + * @vco_max: maximum VCO frequency + * @fb_factor: factor to multiply vco freq with + * @fb_mask: limit and bitmask for feedback divider + * @pd_min: post divider minimum + * @pd_max: post divider maximum + * @pd_even: post divider must be even above this value + * @optimal_fb_div: resulting feedback divider + * @optimal_vclk_div: resulting vclk post divider + * @optimal_dclk_div: resulting dclk post divider + * + * Calculate dividers for UVDs UPLL (except APUs). + * Returns zero on success; -EINVAL on error. + */ +static int si_calc_upll_dividers(struct amdgpu_device *adev, + unsigned vclk, unsigned dclk, + unsigned vco_min, unsigned vco_max, + unsigned fb_factor, unsigned fb_mask, + unsigned pd_min, unsigned pd_max, + unsigned pd_even, + unsigned *optimal_fb_div, + unsigned *optimal_vclk_div, + unsigned *optimal_dclk_div) +{ + unsigned vco_freq, ref_freq = adev->clock.spll.reference_freq; + + /* Start off with something large */ + unsigned optimal_score = ~0; + + /* Loop through vco from low to high */ + vco_min = max(max(vco_min, vclk), dclk); + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { + uint64_t fb_div = (uint64_t)vco_freq * fb_factor; + unsigned vclk_div, dclk_div, score; + + do_div(fb_div, ref_freq); + + /* fb div out of range ? */ + if (fb_div > fb_mask) + break; /* It can oly get worse */ + + fb_div &= fb_mask; + + /* Calc vclk divider with current vco freq */ + vclk_div = si_uvd_calc_upll_post_div(vco_freq, vclk, + pd_min, pd_even); + if (vclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* Calc dclk divider with current vco freq */ + dclk_div = si_uvd_calc_upll_post_div(vco_freq, dclk, + pd_min, pd_even); + if (dclk_div > pd_max) + break; /* vco is too big, it has to stop */ + + /* Calc score with current vco freq */ + score = vclk - (vco_freq / vclk_div) + dclk - (vco_freq / dclk_div); + + /* Determine if this vco setting is better than current optimal settings */ + if (score < optimal_score) { + *optimal_fb_div = fb_div; + *optimal_vclk_div = vclk_div; + *optimal_dclk_div = dclk_div; + optimal_score = score; + if (optimal_score == 0) + break; /* It can't get better than this */ + } + } + + /* Did we found a valid setup ? */ + if (optimal_score == ~0) + return -EINVAL; + + return 0; +} + +static int si_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) +{ + unsigned fb_div = 0, vclk_div = 0, dclk_div = 0; + int r; + + /* Bypass vclk and dclk with bclk */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + /* Put PLL in bypass mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK); + + if (!vclk || !dclk) { + /* Keep the Bypass mode */ + return 0; + } + + r = si_calc_upll_dividers(adev, vclk, dclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &vclk_div, &dclk_div); + if (r) + return r; + + /* Set RESET_ANTI_MUX to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* Set VCO_MODE to 1 */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK); + + /* Disable sleep mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK); + + /* Deassert UPLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(1); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* Assert UPLL_RESET again */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK); + + /* Disable spread spectrum. */ + WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* Set feedback divider */ + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK); + + /* Set ref divider to 0 */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK); + + if (fb_div < 307200) + WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9); + else + WREG32_P(CG_UPLL_FUNC_CNTL_4, + UPLL_SPARE_ISPARE9, + ~UPLL_SPARE_ISPARE9); + + /* Set PDIV_A and PDIV_B */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div), + ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK)); + + /* Give the PLL some time to settle */ + mdelay(15); + + /* Deassert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(15); + + /* Switch from bypass mode to normal mode */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK); + + r = si_uvd_send_upll_ctlreq(adev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* Switch VCLK and DCLK selection */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} + +static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev) +{ + unsigned i; + + /* Make sure VCEPLL_CTLREQ is deasserted */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* Assert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* Wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < SI_MAX_CTLACKS_ASSERTION_WAIT; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + + if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask) + break; + mdelay(10); + } + + /* Deassert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int si_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) +{ + unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0; + int r; + + /* Bypass evclk and ecclk with bclk */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + /* Put PLL in bypass mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK, + ~VCEPLL_BYPASS_EN_MASK); + + if (!evclk || !ecclk) { + /* Keep the Bypass mode, put PLL to sleep */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + return 0; + } + + r = si_calc_upll_dividers(adev, evclk, ecclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &evclk_div, &ecclk_div); + if (r) + return r; + + /* Set RESET_ANTI_MUX to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* Set VCO_MODE to 1 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK, + ~VCEPLL_VCO_MODE_MASK); + + /* Toggle VCEPLL_SLEEP to 1 then back to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK); + + /* Deassert VCEPLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(1); + + r = si_vce_send_vcepll_ctlreq(adev); + if (r) + return r; + + /* Assert VCEPLL_RESET again */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK); + + /* Disable spread spectrum. */ + WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* Set feedback divider */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, + VCEPLL_FB_DIV(fb_div), + ~VCEPLL_FB_DIV_MASK); + + /* Set ref divider to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK); + + /* Set PDIV_A and PDIV_B */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div), + ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK)); + + /* Give the PLL some time to settle */ + mdelay(15); + + /* Deassert PLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(15); + + /* Switch from bypass mode to normal mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK); + + r = si_vce_send_vcepll_ctlreq(adev); + if (r) + return r; + + /* Switch VCLK and DCLK selection */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1438,7 +1789,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .set_vga_state = &si_vga_set_state, .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, - .set_vce_clocks = NULL, + .set_vce_clocks = &si_set_vce_clocks, .get_pcie_lanes = &si_get_pcie_lanes, .set_pcie_lanes = &si_set_pcie_lanes, .get_config_memsize = &si_get_config_memsize, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index c00ba4b23c9a..ea914b256ebd 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -6953,6 +6953,24 @@ static int si_power_control_set_level(struct amdgpu_device *adev) return 0; } +static void si_set_vce_clock(struct amdgpu_device *adev, + struct amdgpu_ps *new_rps, + struct amdgpu_ps *old_rps) +{ + if ((old_rps->evclk != new_rps->evclk) || + (old_rps->ecclk != new_rps->ecclk)) { + /* Turn the clocks on when encoding, off otherwise */ + if (new_rps->evclk || new_rps->ecclk) { + /* Place holder for future VCE1.0 porting to amdgpu + vce_v1_0_enable_mgcg(adev, false, false);*/ + } else { + /* Place holder for future VCE1.0 porting to amdgpu + vce_v1_0_enable_mgcg(adev, true, false); + amdgpu_asic_set_vce_clocks(adev, new_rps->evclk, new_rps->ecclk);*/ + } + } +} + static int si_dpm_set_power_state(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7029,6 +7047,7 @@ static int si_dpm_set_power_state(void *handle) return ret; } ni_set_uvd_clock_after_set_eng_clock(adev, new_ps, old_ps); + si_set_vce_clock(adev, new_ps, old_ps); if (eg_pi->pcie_performance_request) si_notify_link_speed_change_after_state_change(adev, new_ps, old_ps); ret = si_set_power_state_conditionally_enable_ulv(adev, new_ps); diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index 790ba46eaebb..4e935baa7b91 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -121,7 +121,6 @@ #define CURSOR_UPDATE_LOCK (1 << 16) #define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) -#define AMDGPU_NUM_OF_VMIDS 8 #define SI_CRTC0_REGISTER_OFFSET 0 #define SI_CRTC1_REGISTER_OFFSET 0x300 #define SI_CRTC2_REGISTER_OFFSET 0x2600 diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 5f660f0c819f..9a39cbfe6db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -47,8 +47,7 @@ #define SI_MAX_LDS_NUM 0xFFFF #define SI_MAX_TCC 16 #define SI_MAX_TCC_MASK 0xFFFF - -#define AMDGPU_NUM_OF_VMIDS 8 +#define SI_MAX_CTLACKS_ASSERTION_WAIT 100 /* SMC IND accessor regs */ #define SMC_IND_INDEX_0 0x80 @@ -2460,4 +2459,36 @@ #define MC_VM_FB_OFFSET 0x81a +/* Discrete VCE clocks */ +#define CG_VCEPLL_FUNC_CNTL 0xc0030600 +#define VCEPLL_RESET_MASK 0x00000001 +#define VCEPLL_SLEEP_MASK 0x00000002 +#define VCEPLL_BYPASS_EN_MASK 0x00000004 +#define VCEPLL_CTLREQ_MASK 0x00000008 +#define VCEPLL_VCO_MODE_MASK 0x00000600 +#define VCEPLL_REF_DIV_MASK 0x003F0000 +#define VCEPLL_CTLACK_MASK 0x40000000 +#define VCEPLL_CTLACK2_MASK 0x80000000 + +#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601 +#define VCEPLL_PDIV_A(x) ((x) << 0) +#define VCEPLL_PDIV_A_MASK 0x0000007F +#define VCEPLL_PDIV_B(x) ((x) << 8) +#define VCEPLL_PDIV_B_MASK 0x00007F00 +#define EVCLK_SRC_SEL(x) ((x) << 20) +#define EVCLK_SRC_SEL_MASK 0x01F00000 +#define ECCLK_SRC_SEL(x) ((x) << 25) +#define ECCLK_SRC_SEL_MASK 0x3E000000 + +#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602 +#define VCEPLL_FB_DIV(x) ((x) << 0) +#define VCEPLL_FB_DIV_MASK 0x01FFFFFF + +#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603 + +#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604 +#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606 +#define VCEPLL_SSEN_MASK 0x00000001 + + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 3e406eeeaff6..84d811b6e48b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -532,6 +532,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev) bool baco_reset = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_RAVEN: case CHIP_RENOIR: @@ -669,7 +678,7 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) return adev->nbio.funcs->get_rev_id(adev); } -int soc15_set_ip_blocks(struct amdgpu_device *adev) +static void soc15_reg_base_init(struct amdgpu_device *adev) { int r; @@ -681,6 +690,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) vega10_reg_base_init(adev); break; case CHIP_RENOIR: + /* It's safe to do ip discovery here for Renior, + * it doesn't support SRIOV. */ if (amdgpu_discovery) { r = amdgpu_discovery_reg_base_init(adev); if (r) { @@ -697,8 +708,26 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) arct_reg_base_init(adev); break; default: - return -EINVAL; + DRM_ERROR("Unsupported asic type: %d!\n", adev->asic_type); + break; } +} + +void soc15_set_virt_ops(struct amdgpu_device *adev) +{ + adev->virt.ops = &xgpu_ai_virt_ops; + + /* init soc15 reg base early enough so we can + * request request full access for sriov before + * set_ip_blocks. */ + soc15_reg_base_init(adev); +} + +int soc15_set_ip_blocks(struct amdgpu_device *adev) +{ + /* for bare metal case */ + if (!amdgpu_sriov_vf(adev)) + soc15_reg_base_init(adev); if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; @@ -722,9 +751,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->rev_id = soc15_get_rev_id(adev); - if (amdgpu_sriov_vf(adev)) - adev->virt.ops = &xgpu_ai_virt_ops; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index b03f950c486c..8f38f047265b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -90,6 +90,7 @@ struct soc15_ras_field_entry { void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void soc15_set_virt_ops(struct amdgpu_device *adev); int soc15_set_ip_blocks(struct amdgpu_device *adev); void soc15_program_register_sequence(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 7a55457e6f9e..e07e3fae99b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1375,7 +1375,7 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1417,7 +1417,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index a0fb119240f4..37fa163393fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -991,7 +991,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 6dcc3ce0c00a..927c330fad21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1539,7 +1539,7 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1679,7 +1679,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index c0e4133a6dd5..23a9eb5b2c8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1505,7 +1505,7 @@ void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for register write */ - data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); @@ -1660,7 +1660,8 @@ void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* wait for reg writes */ - vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + + vmid * hub->ctx_addr_distance, lower_32_bits(pd_addr), 0xffffffff); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 90fe95f345e3..910a4a32ff78 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -28,6 +28,7 @@ #include "soc15.h" #include "soc15d.h" #include "vcn_v2_0.h" +#include "mmsch_v3_0.h" #include "vcn/vcn_3_0_0_offset.h" #include "vcn/vcn_3_0_0_sh_mask.h" @@ -48,6 +49,17 @@ #define VCN_INSTANCES_SIENNA_CICHLID 2 +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +static int amdgpu_ucode_id_vcns[] = { + AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1 +}; + +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev); @@ -56,10 +68,8 @@ static int vcn_v3_0_set_powergating_state(void *handle, static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); -static int amdgpu_ih_clientid_vcns[] = { - SOC15_IH_CLIENTID_VCN, - SOC15_IH_CLIENTID_VCN1 -}; +static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); +static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); /** * vcn_v3_0_early_init - set function pointers @@ -71,25 +81,33 @@ static int amdgpu_ih_clientid_vcns[] = { static int vcn_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_SIENNA_CICHLID) { - u32 harvest; - int i; + if (amdgpu_sriov_vf(adev)) { adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); - if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) - adev->vcn.harvest_config |= 1 << i; - } + adev->vcn.harvest_config = 0; + adev->vcn.num_enc_rings = 1; + + } else { + if (adev->asic_type == CHIP_SIENNA_CICHLID) { + u32 harvest; + int i; + + adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->vcn.harvest_config |= 1 << i; + } - if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | - AMDGPU_VCN_HARVEST_VCN1)) - /* both instances are harvested, disable the block */ - return -ENOENT; - } else - adev->vcn.num_vcn_inst = 1; + if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | + AMDGPU_VCN_HARVEST_VCN1)) + /* both instances are harvested, disable the block */ + return -ENOENT; + } else + adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 2; + adev->vcn.num_enc_rings = 2; + } vcn_v3_0_set_dec_ring_funcs(adev); vcn_v3_0_set_enc_ring_funcs(adev); @@ -109,6 +127,7 @@ static int vcn_v3_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; + int vcn_doorbell_index = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; r = amdgpu_vcn_sw_init(adev); @@ -136,6 +155,12 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; + if (amdgpu_sriov_vf(adev)) { + vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1; + /* get DWORD offset */ + vcn_doorbell_index = vcn_doorbell_index << 1; + } + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; @@ -166,7 +191,13 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; + if (amdgpu_sriov_vf(adev)) { + ring->doorbell_index = vcn_doorbell_index; + /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ + vcn_doorbell_index++; + } else { + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; + } if (i != 0) ring->no_scheduler = true; sprintf(ring->name, "vcn_dec_%d", i); @@ -184,7 +215,13 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; + if (amdgpu_sriov_vf(adev)) { + ring->doorbell_index = vcn_doorbell_index; + /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ + vcn_doorbell_index++; + } else { + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; + } if (i != 1) ring->no_scheduler = true; sprintf(ring->name, "vcn_enc_%d.%d", i, j); @@ -195,6 +232,11 @@ static int vcn_v3_0_sw_init(void *handle) } } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; @@ -213,6 +255,9 @@ static int vcn_v3_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -235,24 +280,50 @@ static int vcn_v3_0_hw_init(void *handle) struct amdgpu_ring *ring; int i, j, r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v3_0_start_sriov(adev); + if (r) + goto done; - ring = &adev->vcn.inst[i].ring_dec; + /* initialize VCN dec and enc ring buffers */ + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_dec_ring_set_wptr(ring); + ring->sched.ready = true; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_enc_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ring->doorbell_index, i); + ring = &adev->vcn.inst[i].ring_dec; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, i); - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; r = amdgpu_ring_test_helper(ring); if (r) goto done; + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } } } @@ -283,11 +354,13 @@ static int vcn_v3_0_hw_fini(void *handle) ring = &adev->vcn.inst[i].ring_dec; - if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, mmUVD_STATUS))) - vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); - + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) { + vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } ring->sched.ready = false; for (j = 0; j < adev->vcn.num_enc_rings; ++j) { @@ -1137,6 +1210,221 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) +{ + int i, j; + struct amdgpu_ring *ring; + uint64_t cache_addr; + uint64_t rb_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + uint32_t id; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + + struct mmsch_v3_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v3_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v3_0_cmd_direct_polling + direct_poll = { {0} }; + struct mmsch_v3_0_cmd_end end = { {0} }; + struct mmsch_v3_0_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + direct_poll.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_POLLING; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2; + for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + header.inst[i].init_status = 0; + header.inst[i].table_offset = 0; + header.inst[i].table_size = 0; + } + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + table_size = 0; + + MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + id = amdgpu_ucode_id_vcns[i]; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[id].tmr_mc_addr_lo); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[id].tmr_mc_addr_hi); + offset = 0; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET0), + 0); + } else { + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET1), + 0); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE1), + AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[i].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(cache_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_OFFSET2), + 0); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_VCN_CONTEXT_SIZE); + + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->wptr = 0; + rb_addr = ring->gpu_addr; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_BASE_LO), + lower_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_BASE_HI), + upper_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RB_SIZE), + ring->ring_size / 4); + } + + ring = &adev->vcn.inst[i].ring_dec; + ring->wptr = 0; + rb_addr = ring->gpu_addr; + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + lower_32_bits(rb_addr)); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + upper_32_bits(rb_addr)); + /* force RBC into idle state */ + tmp = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, + mmUVD_RBC_RB_CNTL), + tmp); + + /* add end packet */ + MMSCH_V3_0_INSERT_END(); + + /* refine header */ + header.inst[i].init_status = 1; + header.inst[i].table_offset = header.total_size; + header.inst[i].table_size = table_size; + header.total_size += table_size; + } + + /* Update init table header in memory */ + size = sizeof(struct mmsch_v3_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + /* message MMSCH (in VCN[0]) to initialize this client + * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr + * of memory descriptor location + */ + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + /* 2, update vmid of descriptor */ + tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + /* use domain0 for MM scheduler */ + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp); + + /* 3, notify mmsch about the size of this descriptor */ + size = header.total_size; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size); + + /* 4, set resp to zero */ + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0); + + /* 5, kick off the initialization and wait until + * MMSCH_VF_MAILBOX_RESP becomes non-zero + */ + param = 0x10000001; + WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = param + 1; + while (resp != expected) { + resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP); + if (resp == expected) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for mmMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + + return 0; +} + static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; @@ -1575,6 +1863,15 @@ static int vcn_v3_0_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if(state == adev->vcn.cur_state) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index af8986a55354..f6f2ed0830b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -710,6 +710,14 @@ vi_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: @@ -1705,11 +1713,13 @@ static const struct amdgpu_ip_block_version vi_common_ip_block = .funcs = &vi_common_ip_funcs, }; -int vi_set_ip_blocks(struct amdgpu_device *adev) +void vi_set_virt_ops(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) - adev->virt.ops = &xgpu_vi_virt_ops; + adev->virt.ops = &xgpu_vi_virt_ops; +} +int vi_set_ip_blocks(struct amdgpu_device *adev) +{ switch (adev->asic_type) { case CHIP_TOPAZ: /* topaz has no DCE, UVD, VCE */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index defb4aaf929a..9718f98f8533 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -28,6 +28,7 @@ void vi_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); +void vi_set_virt_ops(struct amdgpu_device *adev); int vi_set_ip_blocks(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 7a01e6133798..80ce42aacc0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -67,8 +67,6 @@ #define HPD4_REGISTER_OFFSET (0x18b8 - 0x1898) #define HPD5_REGISTER_OFFSET (0x18c0 - 0x1898) -#define AMDGPU_NUM_OF_VMIDS 8 - #define PIPEID(x) ((x) << 0) #define MEID(x) ((x) << 2) #define VMID(x) ((x) << 4) diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 61474627a32c..e1e4115dcf78 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_int_process_v9.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ + $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 9f59ba93cfe0..24b471734117 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "cik_int.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { struct kfd_vm_fault_info info; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); memset(&info, 0, sizeof(info)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index cf0017f4d9d5..e9b96ad3d9a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -39,6 +39,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "amdgpu_amdkfd.h" +#include "kfd_smi_events.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -1740,6 +1741,20 @@ err_unlock: return r; } +/* Handle requests for watching SMI events */ +static int kfd_ioctl_smi_events(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_smi_events_args *args = data; + struct kfd_dev *dev; + + dev = kfd_device_by_id(args->gpuid); + if (!dev) + return -EINVAL; + + return kfd_smi_event_open(dev, &args->anon_fd); +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1835,6 +1850,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, kfd_ioctl_alloc_queue_gws, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, + kfd_ioctl_smi_events, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 9deadfd8f929..6a250f8fcfb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -679,6 +679,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, case CHIP_NAVI12: case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: pcache_info = navi10_cache_info; num_of_cache_types = ARRAY_SIZE(navi10_cache_info); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 7f6d0958ed62..4bfedaab183f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -74,6 +74,7 @@ static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { [CHIP_NAVI12] = &gfx_v10_kfd2kgd, [CHIP_NAVI14] = &gfx_v10_kfd2kgd, [CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd, + [CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd, }; #ifdef KFD_SUPPORT_IOMMU_V2 @@ -478,6 +479,24 @@ static const struct kfd_device_info sienna_cichlid_device_info = { .num_sdma_queues_per_engine = 8, }; +static const struct kfd_device_info navy_flounder_device_info = { + .asic_family = CHIP_NAVY_FLOUNDER, + .asic_name = "navy_flounder", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .needs_iommu_device = false, + .supports_cwsr = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 8, +}; + /* For each entry, [0] is regular and [1] is virtualisation device. */ static const struct kfd_device_info *kfd_supported_devices[][2] = { #ifdef KFD_SUPPORT_IOMMU_V2 @@ -501,6 +520,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = { [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info}, [CHIP_NAVI14] = {&navi14_device_info, NULL}, [CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info}, + [CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info}, }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -602,15 +622,24 @@ static int kfd_gws_init(struct kfd_dev *kfd) return 0; if (hws_gws_support - || (kfd->device_info->asic_family >= CHIP_VEGA10 + || (kfd->device_info->asic_family == CHIP_VEGA10 + && kfd->mec2_fw_version >= 0x81b3) + || (kfd->device_info->asic_family >= CHIP_VEGA12 && kfd->device_info->asic_family <= CHIP_RAVEN - && kfd->mec2_fw_version >= 0x1b3)) + && kfd->mec2_fw_version >= 0x1b3) + || (kfd->device_info->asic_family == CHIP_ARCTURUS + && kfd->mec2_fw_version >= 0x30)) ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); return ret; } +static void kfd_smi_init(struct kfd_dev *dev) { + INIT_LIST_HEAD(&dev->smi_clients); + spin_lock_init(&dev->smi_lock); +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) @@ -725,6 +754,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_topology_add_device_error; } + kfd_smi_init(kfd); + kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index dd550025d1c1..e0e60b0d0669 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1939,6 +1939,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_NAVI12: case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: device_queue_manager_init_v10_navi10(&dqm->asic_ops); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index b4674cf73132..c1166c40ac15 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -416,6 +416,7 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_NAVI12: case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: kfd_init_apertures_v9(pdd, id); break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index fce6ccabe38b..241bd6ff79f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -24,6 +24,7 @@ #include "kfd_events.h" #include "soc15_int.h" #include "kfd_device_queue_manager.h" +#include "kfd_smi_events.h" static bool event_interrupt_isr_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -117,6 +118,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, info.prot_read = ring_id & 0x10; info.prot_write = ring_id & 0x20; + kfd_smi_event_update_vmfault(dev, pasid); kfd_process_vm_fault(dev->dqm, pasid); kfd_signal_vm_fault_event(dev, pasid, &info); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index d1d68a51bfb8..18e08d82d978 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -113,7 +113,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | - 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 48cda3073b70..3b6f5963180d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -160,7 +160,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | - 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index a5e8ff1e5945..31799e5f3b3c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -117,7 +117,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | - 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; set_priority(m, q); m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 89d7f08d749f..47ee40fbbd86 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -246,6 +246,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_NAVI12: case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: pm->pmf = &kfd_v9_pm_funcs; break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index bdca9dc5f118..dfaf771a42e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -39,7 +39,7 @@ static int pm_map_process_v9(struct packet_manager *pm, packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; - packet->bitfields2.process_quantum = 1; + packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; packet->bitfields14.gds_size = qpd->gds_size & 0x3F; packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index bed4d0ccb6b1..a852e0d7d804 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -50,7 +50,7 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, sizeof(struct pm4_mes_map_process)); packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; - packet->bitfields2.process_quantum = 1; + packet->bitfields2.process_quantum = 10; packet->bitfields2.pasid = qpd->pqm->process->pasid; packet->bitfields3.page_table_base = qpd->page_table_base; packet->bitfields10.gds_size = qpd->gds_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 33b08ff00b50..2a07c4f2cd0d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -25,7 +25,7 @@ #include "amdgpu_ids.h" static unsigned int pasid_bits = 16; -static const struct kfd2kgd_calls *kfd2kgd; +static bool pasids_allocated; /* = false */ bool kfd_set_pasid_limit(unsigned int new_limit) { @@ -33,7 +33,7 @@ bool kfd_set_pasid_limit(unsigned int new_limit) return false; if (new_limit < (1U << pasid_bits)) { - if (kfd2kgd) + if (pasids_allocated) /* We've already allocated user PASIDs, too late to * change the limit */ @@ -53,32 +53,17 @@ unsigned int kfd_get_pasid_limit(void) unsigned int kfd_pasid_alloc(void) { - int r; + int r = amdgpu_pasid_alloc(pasid_bits); - /* Find the first best KFD device for calling KGD */ - if (!kfd2kgd) { - struct kfd_dev *dev = NULL; - unsigned int i = 0; - - while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) { - if (dev && dev->kfd2kgd) { - kfd2kgd = dev->kfd2kgd; - break; - } - i++; - } - - if (!kfd2kgd) - return false; + if (r > 0) { + pasids_allocated = true; + return r; } - r = amdgpu_pasid_alloc(pasid_bits); - - return r > 0 ? r : 0; + return 0; } void kfd_pasid_free(unsigned int pasid) { - if (kfd2kgd) - amdgpu_pasid_free(pasid); + amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 58de109d2909..6727e9de5b8b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -97,7 +97,7 @@ * Size of the per-process TBA+TMA buffer: 2 pages * * The first page is the TBA used for the CWSR ISA code. The second - * page is used as TMA for daisy changing a user-mode trap handler. + * page is used as TMA for user-mode trap handler setup in daisy-chain mode. */ #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) #define KFD_CWSR_TMA_OFFSET PAGE_SIZE @@ -157,29 +157,19 @@ extern int debug_largebar; */ extern int ignore_crat; -/* - * Set sh_mem_config.retry_disable on Vega10 - */ +/* Set sh_mem_config.retry_disable on GFX v9 */ extern int amdgpu_noretry; -/* - * Halt if HWS hang is detected - */ +/* Halt if HWS hang is detected */ extern int halt_if_hws_hang; -/* - * Whether MEC FW support GWS barriers - */ +/* Whether MEC FW support GWS barriers */ extern bool hws_gws_support; -/* - * Queue preemption timeout in ms - */ +/* Queue preemption timeout in ms */ extern int queue_preemption_timeout_ms; -/* - * Enable eviction debug messages - */ +/* Enable eviction debug messages */ extern bool debug_evictions; enum cache_policy { @@ -301,7 +291,7 @@ struct kfd_dev { /* xGMI */ uint64_t hive_id; - + /* UUID */ uint64_t unique_id; @@ -313,8 +303,12 @@ struct kfd_dev { /* Compute Profile ref. count */ atomic_t compute_profile; - /* Global GWS resource shared b/t processes*/ + /* Global GWS resource shared between processes */ void *gws; + + /* Clients watching SMI events */ + struct list_head smi_clients; + spinlock_t smi_lock; }; enum kfd_mempool { @@ -329,7 +323,7 @@ void kfd_chardev_exit(void); struct device *kfd_chardev(void); /** - * enum kfd_unmap_queues_filter + * enum kfd_unmap_queues_filter - Enum for queue filters. * * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue. * @@ -348,15 +342,17 @@ enum kfd_unmap_queues_filter { }; /** - * enum kfd_queue_type + * enum kfd_queue_type - Enum for various queue types. * * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. * - * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. + * @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type. * * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. * * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. + * + * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. */ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, @@ -402,9 +398,9 @@ enum KFD_QUEUE_PRIORITY { * * @write_ptr: Defines the number of dwords written to the ring buffer. * - * @doorbell_ptr: This field aim is to notify the H/W of new packet written to - * the queue ring buffer. This field should be similar to write_ptr and the - * user should update this field after he updated the write_ptr. + * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring + * buffer. This field should be similar to write_ptr and the user should + * update this field after updating the write_ptr. * * @doorbell_off: The doorbell offset in the doorbell pci-bar. * @@ -473,7 +469,7 @@ struct queue_properties { * * @list: Queue linked list. * - * @mqd: The queue MQD. + * @mqd: The queue MQD (memory queue descriptor). * * @mqd_mem_obj: The MQD local gpu memory object. * @@ -482,7 +478,7 @@ struct queue_properties { * @properties: The queue properties. * * @mec: Used only in no cp scheduling mode and identifies to micro engine id - * that the queue should be execute on. + * that the queue should be executed on. * * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe * id. @@ -523,9 +519,6 @@ struct queue { struct kobject kobj; }; -/* - * Please read the kfd_mqd_manager.h description. - */ enum KFD_MQD_TYPE { KFD_MQD_TYPE_HIQ = 0, /* for hiq */ KFD_MQD_TYPE_CP, /* for cp queues and diq */ @@ -583,9 +576,7 @@ struct qcm_process_device { */ bool mapped_gws_queue; - /* - * All the memory management data should be here too - */ + /* All the memory management data should be here too */ uint64_t gds_context_area; /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */ uint64_t page_table_base; @@ -785,11 +776,13 @@ extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); extern struct srcu_struct kfd_processes_srcu; /** - * Ioctl function type. + * typedef amdkfd_ioctl_t - typedef for ioctl function pointer. + * + * @filep: pointer to file structure. + * @p: amdkfd process pointer. + * @data: pointer to arg that was copied from user. * - * \param filep pointer to file structure. - * \param p amdkfd process pointer. - * \param data pointer to arg that was copied from user. + * Return: returns ioctl completion code. */ typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, void *data); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c new file mode 100644 index 000000000000..7b348bf9df21 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -0,0 +1,227 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/poll.h> +#include <linux/wait.h> +#include <linux/anon_inodes.h> +#include <uapi/linux/kfd_ioctl.h> +#include "amdgpu_vm.h" +#include "kfd_priv.h" +#include "kfd_smi_events.h" + +struct kfd_smi_client { + struct list_head list; + struct kfifo fifo; + wait_queue_head_t wait_queue; + /* events enabled */ + uint64_t events; + struct kfd_dev *dev; + spinlock_t lock; +}; + +#define MAX_KFIFO_SIZE 1024 + +static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *); +static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t, + loff_t *); +static int kfd_smi_ev_release(struct inode *, struct file *); + +static const char kfd_smi_name[] = "kfd_smi_ev"; + +static const struct file_operations kfd_smi_ev_fops = { + .owner = THIS_MODULE, + .poll = kfd_smi_ev_poll, + .read = kfd_smi_ev_read, + .write = kfd_smi_ev_write, + .release = kfd_smi_ev_release +}; + +static __poll_t kfd_smi_ev_poll(struct file *filep, + struct poll_table_struct *wait) +{ + struct kfd_smi_client *client = filep->private_data; + __poll_t mask = 0; + + poll_wait(filep, &client->wait_queue, wait); + + spin_lock(&client->lock); + if (!kfifo_is_empty(&client->fifo)) + mask = EPOLLIN | EPOLLRDNORM; + spin_unlock(&client->lock); + + return mask; +} + +static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user, + size_t size, loff_t *offset) +{ + int ret; + size_t to_copy; + struct kfd_smi_client *client = filep->private_data; + unsigned char *buf; + + buf = kmalloc(MAX_KFIFO_SIZE * sizeof(*buf), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* kfifo_to_user can sleep so we can't use spinlock protection around + * it. Instead, we kfifo out as spinlocked then copy them to the user. + */ + spin_lock(&client->lock); + to_copy = kfifo_len(&client->fifo); + if (!to_copy) { + spin_unlock(&client->lock); + ret = -EAGAIN; + goto ret_err; + } + to_copy = min3(size, sizeof(buf), to_copy); + ret = kfifo_out(&client->fifo, buf, to_copy); + spin_unlock(&client->lock); + if (ret <= 0) { + ret = -EAGAIN; + goto ret_err; + } + + ret = copy_to_user(user, buf, to_copy); + if (ret) { + ret = -EFAULT; + goto ret_err; + } + + kfree(buf); + return to_copy; + +ret_err: + kfree(buf); + return ret; +} + +static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user, + size_t size, loff_t *offset) +{ + struct kfd_smi_client *client = filep->private_data; + uint64_t events; + + if (!access_ok(user, size) || size < sizeof(events)) + return -EFAULT; + if (copy_from_user(&events, user, sizeof(events))) + return -EFAULT; + + WRITE_ONCE(client->events, events); + + return sizeof(events); +} + +static int kfd_smi_ev_release(struct inode *inode, struct file *filep) +{ + struct kfd_smi_client *client = filep->private_data; + struct kfd_dev *dev = client->dev; + + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + kfifo_free(&client->fifo); + kfree(client); + + return 0; +} + +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; + struct amdgpu_task_info task_info; + /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */ + /* 16 bytes event + 1 byte space + 25 bytes msg + 1 byte \n = 43 + */ + char fifo_in[43]; + struct kfd_smi_client *client; + int len; + + if (list_empty(&dev->smi_clients)) + return; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, pasid, &task_info); + /* Report VM faults from user applications, not retry from kernel */ + if (!task_info.pid) + return; + + len = snprintf(fifo_in, 43, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT, + task_info.pid, task_info.task_name); + + rcu_read_lock(); + + list_for_each_entry_rcu(client, &dev->smi_clients, list) { + if (!(READ_ONCE(client->events) & KFD_SMI_EVENT_VMFAULT)) + continue; + spin_lock(&client->lock); + if (kfifo_avail(&client->fifo) >= len) { + kfifo_in(&client->fifo, fifo_in, len); + wake_up_all(&client->wait_queue); + } + else + pr_debug("smi_event(vmfault): no space left\n"); + spin_unlock(&client->lock); + } + + rcu_read_unlock(); +} + +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) +{ + struct kfd_smi_client *client; + int ret; + + client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL); + if (!client) + return -ENOMEM; + INIT_LIST_HEAD(&client->list); + + ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL); + if (ret) { + kfree(client); + return ret; + } + + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + + init_waitqueue_head(&client->wait_queue); + spin_lock_init(&client->lock); + client->events = 0; + client->dev = dev; + + spin_lock(&dev->smi_lock); + list_add_rcu(&client->list, &dev->smi_clients); + spin_unlock(&dev->smi_lock); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h new file mode 100644 index 000000000000..a9cb218fef96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -0,0 +1,29 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_SMI_EVENTS_H_INCLUDED +#define KFD_SMI_EVENTS_H_INCLUDED + +int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); +void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index cd18baf62727..f185f6cbc05c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1374,6 +1374,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_NAVI12: case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5b0f708dd8c5..d8224156460e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -978,6 +978,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) /* Update the actual used number of crtc */ adev->mode_info.num_crtc = adev->dm.display_indexes_num; + /* create fake encoders for MST */ + dm_dp_create_fake_mst_encoders(adev); + /* TODO: Add_display_info? */ /* TODO use dynamic cursor width */ @@ -1001,6 +1004,12 @@ error: static void amdgpu_dm_fini(struct amdgpu_device *adev) { + int i; + + for (i = 0; i < adev->dm.display_indexes_num; i++) { + drm_encoder_cleanup(&adev->dm.mst_encoders[i].base); + } + amdgpu_dm_audio_fini(adev); amdgpu_dm_destroy_drm_device(&adev->dm); @@ -1076,6 +1085,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) case CHIP_RENOIR: #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: #endif return 0; case CHIP_NAVI12: @@ -1175,6 +1185,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev) break; #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: dmub_asic = DMUB_ASIC_DCN30; fw_name_dmub = FIRMWARE_SIENNA_CICHLID_DMUB; break; @@ -2019,6 +2030,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) struct amdgpu_display_manager *dm; struct drm_connector *conn_base; struct amdgpu_device *adev; + struct dc_link *link = NULL; static const u8 pre_computed_values[] = { 50, 51, 52, 53, 55, 56, 57, 58, 59, 61, 62, 63, 65, 66, 68, 69, 71, 72, 74, 75, 77, 79, 81, 82, 84, 86, 88, 90, 92, 94, 96, 98}; @@ -2026,6 +2038,10 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) if (!aconnector || !aconnector->dc_link) return; + link = aconnector->dc_link; + if (link->connector_signal != SIGNAL_TYPE_EDP) + return; + conn_base = &aconnector->base; adev = conn_base->dev->dev_private; dm = &adev->dm; @@ -3216,6 +3232,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case CHIP_RENOIR: #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: #endif if (dcn10_register_irq_handlers(dm->adev)) { DRM_ERROR("DM: Failed to initialize IRQ\n"); @@ -3373,6 +3390,7 @@ static int dm_early_init(void *handle) case CHIP_NAVI12: #if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: #endif adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; @@ -3696,6 +3714,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, adev->asic_type == CHIP_NAVI12 || #if defined(CONFIG_DRM_AMD_DC_DCN3_0) adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER || #endif adev->asic_type == CHIP_RENOIR || adev->asic_type == CHIP_RAVEN) { @@ -3717,9 +3736,9 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, tiling_info->gfx9.shaderEnable = 1; #ifdef CONFIG_DRM_AMD_DC_DCN3_0 - if (adev->asic_type == CHIP_SIENNA_CICHLID) + if (adev->asic_type == CHIP_SIENNA_CICHLID || + adev->asic_type == CHIP_NAVY_FLOUNDER) tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; - #endif ret = fill_plane_dcc_attributes(adev, afb, format, rotation, plane_size, tiling_info, @@ -4556,24 +4575,20 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket, false, false); - if (stream->link->psr_settings.psr_feature_enabled) { - struct dc *core_dc = stream->link->ctx->dc; - - if (dc_is_dmcu_initialized(core_dc)) { - // - // should decide stream support vsc sdp colorimetry capability - // before building vsc info packet - // - stream->use_vsc_sdp_for_colorimetry = false; - if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { - stream->use_vsc_sdp_for_colorimetry = - aconnector->dc_sink->is_vsc_sdp_colorimetry_supported; - } else { - if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) - stream->use_vsc_sdp_for_colorimetry = true; - } - mod_build_vsc_infopacket(stream, &stream->vsc_infopacket); + if (stream->link->psr_settings.psr_feature_enabled) { + // + // should decide stream support vsc sdp colorimetry capability + // before building vsc info packet + // + stream->use_vsc_sdp_for_colorimetry = false; + if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { + stream->use_vsc_sdp_for_colorimetry = + aconnector->dc_sink->is_vsc_sdp_colorimetry_supported; + } else { + if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) + stream->use_vsc_sdp_for_colorimetry = true; } + mod_build_vsc_infopacket(stream, &stream->vsc_infopacket); } finish: dc_sink_release(sink); @@ -4639,7 +4654,6 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) } state->active_planes = cur->active_planes; - state->interrupts_enabled = cur->interrupts_enabled; state->vrr_params = cur->vrr_params; state->vrr_infopacket = cur->vrr_infopacket; state->abm_level = cur->abm_level; @@ -5300,29 +5314,19 @@ static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) return num_active; } -/* - * Sets whether interrupts should be enabled on a specific CRTC. - * We require that the stream be enabled and that there exist active - * DC planes on the stream. - */ -static void -dm_update_crtc_interrupt_state(struct drm_crtc *crtc, - struct drm_crtc_state *new_crtc_state) +static void dm_update_crtc_active_planes(struct drm_crtc *crtc, + struct drm_crtc_state *new_crtc_state) { struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); dm_new_crtc_state->active_planes = 0; - dm_new_crtc_state->interrupts_enabled = false; if (!dm_new_crtc_state->stream) return; dm_new_crtc_state->active_planes = count_crtc_active_planes(new_crtc_state); - - dm_new_crtc_state->interrupts_enabled = - dm_new_crtc_state->active_planes > 0; } static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, @@ -5333,13 +5337,7 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(state); int ret = -EINVAL; - /* - * Update interrupt state for the CRTC. This needs to happen whenever - * the CRTC has changed or whenever any of its planes have changed. - * Atomic check satisfies both of these requirements since the CRTC - * is added to the state by DRM during drm_atomic_helper_check_planes. - */ - dm_update_crtc_interrupt_state(crtc, state); + dm_update_crtc_active_planes(crtc, state); if (unlikely(!dm_crtc_state->stream && modeset_required(state, NULL, dm_crtc_state->stream))) { @@ -5864,6 +5862,7 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, uint32_t formats[32]; int num_formats; int res = -EPERM; + unsigned int supported_rotations; num_formats = get_plane_formats(plane, plane_cap, formats, ARRAY_SIZE(formats)); @@ -5898,6 +5897,13 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); } + supported_rotations = + DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | + DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; + + drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, + supported_rotations); + drm_plane_helper_add(plane, &dm_plane_helper_funcs); /* Create (reset) the plane state */ @@ -6442,8 +6448,10 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, bool enable) { /* - * this is not correct translation but will work as soon as VBLANK - * constant is the same as PFLIP + * We have no guarantee that the frontend index maps to the same + * backend index - some even map to more than one. + * + * TODO: Use a different interrupt or check DC itself for the mapping. */ int irq_type = amdgpu_display_crtc_idx_to_irq_type( @@ -6466,6 +6474,19 @@ static void manage_dm_interrupts(struct amdgpu_device *adev, } } +static void dm_update_pflip_irq_state(struct amdgpu_device *adev, + struct amdgpu_crtc *acrtc) +{ + int irq_type = + amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); + + /** + * This reads the current state for the IRQ and force reapplies + * the setting to hardware. + */ + amdgpu_irq_update(adev, &adev->pageflip_irq, irq_type); +} + static bool is_scaling_state_different(const struct dm_connector_state *dm_state, const struct dm_connector_state *old_dm_state) @@ -7050,7 +7071,16 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, usleep_range(1000, 1100); } - if (acrtc_attach->base.state->event) { + /** + * Prepare the flip event for the pageflip interrupt to handle. + * + * This only works in the case where we've already turned on the + * appropriate hardware blocks (eg. HUBP) so in the transition case + * from 0 -> n planes we have to skip a hardware generated event + * and rely on sending it from software. + */ + if (acrtc_attach->base.state->event && + acrtc_state->active_planes > 0) { drm_crtc_vblank_get(pcrtc); spin_lock_irqsave(&pcrtc->dev->event_lock, flags); @@ -7119,6 +7149,24 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, &bundle->stream_update, dc_state); + /** + * Enable or disable the interrupts on the backend. + * + * Most pipes are put into power gating when unused. + * + * When power gating is enabled on a pipe we lose the + * interrupt enablement state when power gating is disabled. + * + * So we need to update the IRQ control state in hardware + * whenever the pipe turns on (since it could be previously + * power gated) or off (since some pipes can't be power gated + * on some ASICs). + */ + if (dm_old_crtc_state->active_planes != acrtc_state->active_planes) + dm_update_pflip_irq_state( + (struct amdgpu_device *)dev->dev_private, + acrtc_attach); + if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && acrtc_state->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED && !acrtc_state->stream->link->psr_settings.psr_feature_enabled) @@ -7220,64 +7268,6 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev, } /* - * Enable interrupts on CRTCs that are newly active, undergone - * a modeset, or have active planes again. - * - * Done in two passes, based on the for_modeset flag: - * Pass 1: For CRTCs going through modeset - * Pass 2: For CRTCs going from 0 to n active planes - * - * Interrupts can only be enabled after the planes are programmed, - * so this requires a two-pass approach since we don't want to - * just defer the interrupts until after commit planes every time. - */ -static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev, - struct drm_atomic_state *state, - bool for_modeset) -{ - struct amdgpu_device *adev = dev->dev_private; - struct drm_crtc *crtc; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; - int i; -#ifdef CONFIG_DEBUG_FS - enum amdgpu_dm_pipe_crc_source source; -#endif - - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, - new_crtc_state, i) { - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct dm_crtc_state *dm_new_crtc_state = - to_dm_crtc_state(new_crtc_state); - struct dm_crtc_state *dm_old_crtc_state = - to_dm_crtc_state(old_crtc_state); - bool modeset = drm_atomic_crtc_needs_modeset(new_crtc_state); - bool run_pass; - - run_pass = (for_modeset && modeset) || - (!for_modeset && !modeset && - !dm_old_crtc_state->interrupts_enabled); - - if (!run_pass) - continue; - - if (!dm_new_crtc_state->interrupts_enabled) - continue; - - manage_dm_interrupts(adev, acrtc, true); - -#ifdef CONFIG_DEBUG_FS - /* The stream has changed so CRC capture needs to re-enabled. */ - source = dm_new_crtc_state->crc_src; - if (amdgpu_dm_is_valid_crc_source(source)) { - amdgpu_dm_crtc_configure_crc_source( - crtc, dm_new_crtc_state, - dm_new_crtc_state->crc_src); - } -#endif - } -} - -/* * amdgpu_dm_crtc_copy_transient_flags - copy mirrored flags from DRM to DC * @crtc_state: the DRM CRTC state * @stream_state: the DC stream state. @@ -7316,12 +7306,10 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev, * in atomic check. */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - if (dm_old_crtc_state->interrupts_enabled && - (!dm_new_crtc_state->interrupts_enabled || + if (old_crtc_state->active && + (!new_crtc_state->active || drm_atomic_crtc_needs_modeset(new_crtc_state))) manage_dm_interrupts(adev, acrtc, false); } @@ -7600,8 +7588,34 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dm_new_crtc_state); } - /* Enable interrupts for CRTCs going through a modeset. */ - amdgpu_dm_enable_crtc_interrupts(dev, state, true); + /** + * Enable interrupts for CRTCs that are newly enabled or went through + * a modeset. It was intentionally deferred until after the front end + * state was modified to wait until the OTG was on and so the IRQ + * handlers didn't access stale or invalid state. + */ + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + + if (new_crtc_state->active && + (!old_crtc_state->active || + drm_atomic_crtc_needs_modeset(new_crtc_state))) { + manage_dm_interrupts(adev, acrtc, true); +#ifdef CONFIG_DEBUG_FS + /** + * Frontend may have changed so reapply the CRC capture + * settings for the stream. + */ + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + + if (amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { + amdgpu_dm_crtc_configure_crc_source( + crtc, dm_new_crtc_state, + dm_new_crtc_state->crc_src); + } +#endif + } + } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) if (new_crtc_state->async_flip) @@ -7616,9 +7630,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dm, crtc, wait_for_vblank); } - /* Enable interrupts for CRTCs going from 0 to n active planes. */ - amdgpu_dm_enable_crtc_interrupts(dev, state, false); - /* Update audio instances for each connector. */ amdgpu_dm_commit_audio(dev, state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 86c132ddc452..dd1559c743c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -43,6 +43,9 @@ */ #define AMDGPU_DM_MAX_DISPLAY_INDEX 31 + +#define AMDGPU_DM_MAX_CRTC 6 + /* #include "include/amdgpu_dal_power_if.h" #include "amdgpu_dm_irq.h" @@ -330,6 +333,13 @@ struct amdgpu_display_manager { * available in FW */ const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; + + /** + * @mst_encoders: + * + * fake encoders used for DP MST. + */ + struct amdgpu_encoder mst_encoders[AMDGPU_DM_MAX_CRTC]; }; struct amdgpu_dm_connector { @@ -358,7 +368,6 @@ struct amdgpu_dm_connector { struct amdgpu_dm_dp_aux dm_dp_aux; struct drm_dp_mst_port *port; struct amdgpu_dm_connector *mst_port; - struct amdgpu_encoder *mst_encoder; struct drm_dp_aux *dsc_aux; /* TODO see if we can merge with ddc_bus or make a dm_connector */ @@ -405,7 +414,6 @@ struct dm_crtc_state { int update_type; int active_planes; - bool interrupts_enabled; int crc_skip_count; enum amdgpu_dm_pipe_crc_source crc_src; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index db4fab10a0c4..caf3beaf4b7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -46,6 +46,89 @@ struct dmub_debugfs_trace_entry { uint32_t param1; }; + +/* parse_write_buffer_into_params - Helper function to parse debugfs write buffer into an array + * + * Function takes in attributes passed to debugfs write entry + * and writes into param array. + * The user passes max_param_num to identify maximum number of + * parameters that could be parsed. + * + */ +static int parse_write_buffer_into_params(char *wr_buf, uint32_t wr_buf_size, + long *param, const char __user *buf, + int max_param_num, + uint8_t *param_nums) +{ + char *wr_buf_ptr = NULL; + uint32_t wr_buf_count = 0; + int r; + char *sub_str = NULL; + const char delimiter[3] = {' ', '\n', '\0'}; + uint8_t param_index = 0; + + *param_nums = 0; + + wr_buf_ptr = wr_buf; + + r = copy_from_user(wr_buf_ptr, buf, wr_buf_size); + + /* r is bytes not be copied */ + if (r >= wr_buf_size) { + DRM_DEBUG_DRIVER("user data not be read\n"); + return -EINVAL; + } + + /* check number of parameters. isspace could not differ space and \n */ + while ((*wr_buf_ptr != 0xa) && (wr_buf_count < wr_buf_size)) { + /* skip space*/ + while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) { + wr_buf_ptr++; + wr_buf_count++; + } + + if (wr_buf_count == wr_buf_size) + break; + + /* skip non-space*/ + while ((!isspace(*wr_buf_ptr)) && (wr_buf_count < wr_buf_size)) { + wr_buf_ptr++; + wr_buf_count++; + } + + (*param_nums)++; + + if (wr_buf_count == wr_buf_size) + break; + } + + if (*param_nums > max_param_num) + *param_nums = max_param_num; +; + + wr_buf_ptr = wr_buf; /* reset buf pointer */ + wr_buf_count = 0; /* number of char already checked */ + + while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) { + wr_buf_ptr++; + wr_buf_count++; + } + + while (param_index < *param_nums) { + /* after strsep, wr_buf_ptr will be moved to after space */ + sub_str = strsep(&wr_buf_ptr, delimiter); + + r = kstrtol(sub_str, 16, &(param[param_index])); + + if (r) + DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r); + + param_index++; + } + + return 0; +} + /* function description * get/ set DP configuration: lane_count, link_rate, spread_spectrum * @@ -161,15 +244,11 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, struct dc *dc = (struct dc *)link->dc; struct dc_link_settings prefer_link_settings; char *wr_buf = NULL; - char *wr_buf_ptr = NULL; const uint32_t wr_buf_size = 40; - int r; - int bytes_from_user; - char *sub_str; /* 0: lane_count; 1: link_rate */ - uint8_t param_index = 0; + int max_param_num = 2; + uint8_t param_nums = 0; long param[2]; - const char delimiter[3] = {' ', '\n', '\0'}; bool valid_input = false; if (size == 0) @@ -177,35 +256,20 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL); if (!wr_buf) - return -EINVAL; - wr_buf_ptr = wr_buf; + return -ENOSPC; - r = copy_from_user(wr_buf_ptr, buf, wr_buf_size); - - /* r is bytes not be copied */ - if (r >= wr_buf_size) { + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, + (long *)param, buf, + max_param_num, + ¶m_nums)) { kfree(wr_buf); - DRM_DEBUG_DRIVER("user data not read\n"); return -EINVAL; } - bytes_from_user = wr_buf_size - r; - - while (isspace(*wr_buf_ptr)) - wr_buf_ptr++; - - while ((*wr_buf_ptr != '\0') && (param_index < 2)) { - - sub_str = strsep(&wr_buf_ptr, delimiter); - - r = kstrtol(sub_str, 16, ¶m[param_index]); - - if (r) - DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r); - - param_index++; - while (isspace(*wr_buf_ptr)) - wr_buf_ptr++; + if (param_nums <= 0) { + kfree(wr_buf); + DRM_DEBUG_DRIVER("user data not be read\n"); + return -EINVAL; } switch (param[0]) { @@ -233,7 +297,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, if (!valid_input) { kfree(wr_buf); DRM_DEBUG_DRIVER("Invalid Input value No HW will be programmed\n"); - return bytes_from_user; + return size; } /* save user force lane_count, link_rate to preferred settings @@ -246,7 +310,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, dc_link_set_preferred_link_settings(dc, &prefer_link_settings, link); kfree(wr_buf); - return bytes_from_user; + return size; } /* function: get current DP PHY settings: voltage swing, pre-emphasis, @@ -337,51 +401,34 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, struct dc_link *link = connector->dc_link; struct dc *dc = (struct dc *)link->dc; char *wr_buf = NULL; - char *wr_buf_ptr = NULL; uint32_t wr_buf_size = 40; - int r; - int bytes_from_user; - char *sub_str; - uint8_t param_index = 0; long param[3]; - const char delimiter[3] = {' ', '\n', '\0'}; bool use_prefer_link_setting; struct link_training_settings link_lane_settings; + int max_param_num = 3; + uint8_t param_nums = 0; + int r = 0; + if (size == 0) - return 0; + return -EINVAL; wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL); if (!wr_buf) - return 0; - wr_buf_ptr = wr_buf; + return -ENOSPC; - r = copy_from_user(wr_buf_ptr, buf, wr_buf_size); - - /* r is bytes not be copied */ - if (r >= wr_buf_size) { + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, + (long *)param, buf, + max_param_num, + ¶m_nums)) { kfree(wr_buf); - DRM_DEBUG_DRIVER("user data not be read\n"); - return 0; + return -EINVAL; } - bytes_from_user = wr_buf_size - r; - - while (isspace(*wr_buf_ptr)) - wr_buf_ptr++; - - while ((*wr_buf_ptr != '\0') && (param_index < 3)) { - - sub_str = strsep(&wr_buf_ptr, delimiter); - - r = kstrtol(sub_str, 16, ¶m[param_index]); - - if (r) - DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r); - - param_index++; - while (isspace(*wr_buf_ptr)) - wr_buf_ptr++; + if (param_nums <= 0) { + kfree(wr_buf); + DRM_DEBUG_DRIVER("user data not be read\n"); + return -EINVAL; } if ((param[0] > VOLTAGE_SWING_MAX_LEVEL) || @@ -389,7 +436,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, (param[2] > POST_CURSOR2_MAX_LEVEL)) { kfree(wr_buf); DRM_DEBUG_DRIVER("Invalid Input No HW will be programmed\n"); - return bytes_from_user; + return size; } /* get link settings: lane count, link rate */ @@ -429,7 +476,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, dc_link_set_drive_settings(dc, &link_lane_settings, link); kfree(wr_buf); - return bytes_from_user; + return size; } /* function description @@ -496,19 +543,13 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us struct amdgpu_dm_connector *connector = file_inode(f)->i_private; struct dc_link *link = connector->dc_link; char *wr_buf = NULL; - char *wr_buf_ptr = NULL; uint32_t wr_buf_size = 100; - uint32_t wr_buf_count = 0; - int r; - int bytes_from_user; - char *sub_str = NULL; - uint8_t param_index = 0; - uint8_t param_nums = 0; long param[11] = {0x0}; - const char delimiter[3] = {' ', '\n', '\0'}; + int max_param_num = 11; enum dp_test_pattern test_pattern = DP_TEST_PATTERN_UNSUPPORTED; bool disable_hpd = false; bool valid_test_pattern = false; + uint8_t param_nums = 0; /* init with defalut 80bit custom pattern */ uint8_t custom_pattern[10] = { 0x1f, 0x7c, 0xf0, 0xc1, 0x07, @@ -522,70 +563,26 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us int i; if (size == 0) - return 0; + return -EINVAL; wr_buf = kcalloc(wr_buf_size, sizeof(char), GFP_KERNEL); if (!wr_buf) - return 0; - wr_buf_ptr = wr_buf; + return -ENOSPC; - r = copy_from_user(wr_buf_ptr, buf, wr_buf_size); - - /* r is bytes not be copied */ - if (r >= wr_buf_size) { + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, + (long *)param, buf, + max_param_num, + ¶m_nums)) { kfree(wr_buf); - DRM_DEBUG_DRIVER("user data not be read\n"); - return 0; - } - - bytes_from_user = wr_buf_size - r; - - /* check number of parameters. isspace could not differ space and \n */ - while ((*wr_buf_ptr != 0xa) && (wr_buf_count < wr_buf_size)) { - /* skip space*/ - while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) { - wr_buf_ptr++; - wr_buf_count++; - } - - if (wr_buf_count == wr_buf_size) - break; - - /* skip non-space*/ - while ((!isspace(*wr_buf_ptr)) && (wr_buf_count < wr_buf_size)) { - wr_buf_ptr++; - wr_buf_count++; - } - - param_nums++; - - if (wr_buf_count == wr_buf_size) - break; + return -EINVAL; } - /* max 11 parameters */ - if (param_nums > 11) - param_nums = 11; - - wr_buf_ptr = wr_buf; /* reset buf pinter */ - wr_buf_count = 0; /* number of char already checked */ - - while (isspace(*wr_buf_ptr) && (wr_buf_count < wr_buf_size)) { - wr_buf_ptr++; - wr_buf_count++; + if (param_nums <= 0) { + kfree(wr_buf); + DRM_DEBUG_DRIVER("user data not be read\n"); + return -EINVAL; } - while (param_index < param_nums) { - /* after strsep, wr_buf_ptr will be moved to after space */ - sub_str = strsep(&wr_buf_ptr, delimiter); - - r = kstrtol(sub_str, 16, ¶m[param_index]); - - if (r) - DRM_DEBUG_DRIVER("string to int convert error code: %d\n", r); - - param_index++; - } test_pattern = param[0]; @@ -618,7 +615,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us if (!valid_test_pattern) { kfree(wr_buf); DRM_DEBUG_DRIVER("Invalid Test Pattern Parameters\n"); - return bytes_from_user; + return size; } if (test_pattern == DP_TEST_PATTERN_80BIT_CUSTOM) { @@ -685,7 +682,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us kfree(wr_buf); - return bytes_from_user; + return size; } /** diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index cf15248739f7..0affd1997fe7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -95,7 +95,6 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct amdgpu_encoder *amdgpu_encoder = aconnector->mst_encoder; if (aconnector->dc_sink) { dc_link_remove_remote_sink(aconnector->dc_link, @@ -105,8 +104,6 @@ dm_dp_mst_connector_destroy(struct drm_connector *connector) kfree(aconnector->edid); - drm_encoder_cleanup(&amdgpu_encoder->base); - kfree(amdgpu_encoder); drm_connector_cleanup(connector); drm_dp_mst_put_port_malloc(aconnector->port); kfree(aconnector); @@ -243,7 +240,11 @@ static struct drm_encoder * dm_mst_atomic_best_encoder(struct drm_connector *connector, struct drm_connector_state *connector_state) { - return &to_amdgpu_dm_connector(connector)->mst_encoder->base; + struct drm_device *dev = connector->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc); + + return &adev->dm.mst_encoders[acrtc->crtc_id].base; } static int @@ -306,31 +307,27 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { .destroy = amdgpu_dm_encoder_destroy, }; -static struct amdgpu_encoder * -dm_dp_create_fake_mst_encoder(struct amdgpu_dm_connector *connector) +void +dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev) { - struct drm_device *dev = connector->base.dev; - struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_encoder *amdgpu_encoder; - struct drm_encoder *encoder; - - amdgpu_encoder = kzalloc(sizeof(*amdgpu_encoder), GFP_KERNEL); - if (!amdgpu_encoder) - return NULL; + struct drm_device *dev = adev->ddev; + int i; - encoder = &amdgpu_encoder->base; - encoder->possible_crtcs = amdgpu_dm_get_encoder_crtc_mask(adev); + for (i = 0; i < adev->dm.display_indexes_num; i++) { + struct amdgpu_encoder *amdgpu_encoder = &adev->dm.mst_encoders[i]; + struct drm_encoder *encoder = &amdgpu_encoder->base; - drm_encoder_init( - dev, - &amdgpu_encoder->base, - &amdgpu_dm_encoder_funcs, - DRM_MODE_ENCODER_DPMST, - NULL); + encoder->possible_crtcs = amdgpu_dm_get_encoder_crtc_mask(adev); - drm_encoder_helper_add(encoder, &amdgpu_dm_encoder_helper_funcs); + drm_encoder_init( + dev, + &amdgpu_encoder->base, + &amdgpu_dm_encoder_funcs, + DRM_MODE_ENCODER_DPMST, + NULL); - return amdgpu_encoder; + drm_encoder_helper_add(encoder, &amdgpu_dm_encoder_helper_funcs); + } } static struct drm_connector * @@ -343,6 +340,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct amdgpu_device *adev = dev->dev_private; struct amdgpu_dm_connector *aconnector; struct drm_connector *connector; + int i; aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL); if (!aconnector) @@ -369,9 +367,10 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, master->dc_link, master->connector_id); - aconnector->mst_encoder = dm_dp_create_fake_mst_encoder(master); - drm_connector_attach_encoder(&aconnector->base, - &aconnector->mst_encoder->base); + for (i = 0; i < adev->dm.display_indexes_num; i++) { + drm_connector_attach_encoder(&aconnector->base, + &adev->dm.mst_encoders[i].base); + } connector->max_bpc_property = master->base.max_bpc_property; if (connector->max_bpc_property) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index d2c56579a2cc..b38bd68121ce 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -35,6 +35,9 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector, int link_index); +void +dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev); + #if defined(CONFIG_DRM_AMD_DC_DCN) bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, struct dc_state *dc_state); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index f376058b5df6..6a345d43028c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -174,9 +174,6 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p case FAMILY_NV: #if defined(CONFIG_DRM_AMD_DC_DCN3_0) if (ASICREV_IS_SIENNA_CICHLID_P(asic_id.hw_internal_rev)) { - /* TODO: to add SIENNA_CICHLID clk_mgr support, once CLK IP header files are available, - * for now use DCN3AG clk mgr. - */ dcn3_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); break; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c index c320b7af7d34..dbc7cde00433 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c @@ -26,6 +26,7 @@ #include "core_types.h" #include "clk_mgr_internal.h" #include "reg_helper.h" +#include <linux/delay.h> #define MAX_INSTANCE 5 #define MAX_SEGMENT 5 @@ -68,10 +69,42 @@ static const struct IP_BASE MP1_BASE = { { { { 0x00016000, 0, 0, 0, 0 } }, #define VBIOSSMC_MSG_SetDispclkFreq 0x4 #define VBIOSSMC_MSG_SetDprefclkFreq 0x5 +#define VBIOSSMC_Status_BUSY 0x0 +#define VBIOSSMC_Result_OK 0x1 +#define VBIOSSMC_Result_Failed 0xFF +#define VBIOSSMC_Result_UnknownCmd 0xFE +#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD +#define VBIOSSMC_Result_CmdRejectedBusy 0xFC + +/* + * Function to be used instead of REG_WAIT macro because the wait ends when + * the register is NOT EQUAL to zero, and because the translation in msg_if.h + * won't work with REG_WAIT. + */ +static uint32_t rv1_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries) +{ + uint32_t res_val = VBIOSSMC_Status_BUSY; + + do { + res_val = REG_READ(MP1_SMN_C2PMSG_91); + if (res_val != VBIOSSMC_Status_BUSY) + break; + + if (delay_us >= 1000) + msleep(delay_us/1000); + else if (delay_us > 0) + udelay(delay_us); + } while (max_retries--); + + return res_val; +} + int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param) { + uint32_t result; + /* First clear response register */ - REG_WRITE(MP1_SMN_C2PMSG_91, 0); + REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY); /* Set the parameter register for the SMU message, unit is Mhz */ REG_WRITE(MP1_SMN_C2PMSG_83, param); @@ -79,7 +112,9 @@ int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned /* Trigger the message transaction by writing the message ID */ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id); - REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + result = rv1_smu_wait_for_response(clk_mgr, 10, 1000); + + ASSERT(result == VBIOSSMC_Result_OK); /* Actual dispclk set is returned in the parameter register */ return REG_READ(MP1_SMN_C2PMSG_83); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index c63ec960e116..f2114bc910bf 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -184,13 +184,6 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, pp_smu->set_display_count(&pp_smu->pp_smu, display_count); } - if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) { - clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz; - if (pp_smu && pp_smu->set_voltage_by_freq) - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); - } - - if (dc->debug.force_min_dcfclk_mhz > 0) new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000); @@ -417,8 +410,6 @@ static bool dcn2_are_clock_states_equal(struct dc_clocks *a, return false; else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) return false; - else if (a->phyclk_khz != b->phyclk_khz) - return false; else if (a->dramclk_khz != b->dramclk_khz) return false; else if (a->p_state_change_support != b->p_state_change_support) @@ -427,6 +418,31 @@ static bool dcn2_are_clock_states_equal(struct dc_clocks *a, return true; } +/* Notify clk_mgr of a change in link rate, update phyclk frequency if necessary */ +static void dcn2_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_link *link) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + unsigned int i, max_phyclk_req = 0; + struct pp_smu_funcs_nv *pp_smu = NULL; + + if (!clk_mgr->pp_smu || !clk_mgr->pp_smu->nv_funcs.set_voltage_by_freq) + return; + + pp_smu = &clk_mgr->pp_smu->nv_funcs; + + clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ; + + for (i = 0; i < MAX_PIPES * 2; i++) { + if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req) + max_phyclk_req = clk_mgr->cur_phyclk_req_table[i]; + } + + if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) { + clk_mgr_base->clks.phyclk_khz = max_phyclk_req; + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); + } +} + static struct clk_mgr_funcs dcn2_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn2_update_clocks, @@ -434,6 +450,7 @@ static struct clk_mgr_funcs dcn2_funcs = { .enable_pme_wa = dcn2_enable_pme_wa, .get_clock = dcn2_get_clock, .are_clock_states_equal = dcn2_are_clock_states_equal, + .notify_link_rate_change = dcn2_notify_link_rate_change, }; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 39788a7bd003..c664404a75d4 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -136,11 +136,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, } } - if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) { - clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz; - rn_vbios_smu_set_phyclk(clk_mgr, clk_mgr_base->clks.phyclk_khz); - } - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; rn_vbios_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz); @@ -496,13 +491,33 @@ static bool rn_are_clock_states_equal(struct dc_clocks *a, } +/* Notify clk_mgr of a change in link rate, update phyclk frequency if necessary */ +static void rn_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_link *link) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + unsigned int i, max_phyclk_req = 0; + + clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ; + + for (i = 0; i < MAX_PIPES * 2; i++) { + if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req) + max_phyclk_req = clk_mgr->cur_phyclk_req_table[i]; + } + + if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) { + clk_mgr_base->clks.phyclk_khz = max_phyclk_req; + rn_vbios_smu_set_phyclk(clk_mgr, clk_mgr_base->clks.phyclk_khz); + } +} + static struct clk_mgr_funcs dcn21_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = rn_update_clocks, .init_clocks = rn_init_clocks, .enable_pme_wa = rn_enable_pme_wa, .are_clock_states_equal = rn_are_clock_states_equal, - .notify_wm_ranges = rn_notify_wm_ranges + .notify_wm_ranges = rn_notify_wm_ranges, + .notify_link_rate_change = rn_notify_link_rate_change, }; static struct clk_bw_params rn_bw_params = { @@ -619,6 +634,42 @@ static struct wm_table lpddr4_wm_table = { } }; +static struct wm_table lpddr4_wm_table_with_disabled_ppt = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 8.32, + .sr_enter_plus_exit_time_us = 9.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { @@ -723,6 +774,7 @@ void rn_clk_mgr_construct( struct clk_log_info log_info = {0}; clk_mgr->smu_ver = rn_vbios_smu_get_smu_version(clk_mgr); + clk_mgr->periodic_retraining_disabled = rn_vbios_smu_is_periodic_retraining_disabled(clk_mgr); /* SMU Version 55.51.0 and up no longer have an issue * that needs to limit minimum dispclk */ @@ -737,7 +789,11 @@ void rn_clk_mgr_construct( clk_mgr->base.dentist_vco_freq_khz = 3600000; if (ctx->dc_bios->integrated_info->memory_type == LpDdr4MemType) { - rn_bw_params.wm_table = lpddr4_wm_table; + if (clk_mgr->periodic_retraining_disabled) { + rn_bw_params.wm_table = lpddr4_wm_table_with_disabled_ppt; + } else { + rn_bw_params.wm_table = lpddr4_wm_table; + } } else { rn_bw_params.wm_table = ddr4_wm_table; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 6878aedf1d3e..9a374522e963 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -26,6 +26,7 @@ #include "core_types.h" #include "clk_mgr_internal.h" #include "reg_helper.h" +#include <linux/delay.h> #include "renoir_ip_offset.h" @@ -51,12 +52,46 @@ #define VBIOSSMC_MSG_GetFclkFrequency 0xB #define VBIOSSMC_MSG_SetDisplayCount 0xC #define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0xD -#define VBIOSSMC_MSG_UpdatePmeRestore 0xE +#define VBIOSSMC_MSG_UpdatePmeRestore 0xE +#define VBIOSSMC_MSG_IsPeriodicRetrainingDisabled 0xF + +#define VBIOSSMC_Status_BUSY 0x0 +#define VBIOSSMC_Result_OK 0x1 +#define VBIOSSMC_Result_Failed 0xFF +#define VBIOSSMC_Result_UnknownCmd 0xFE +#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD +#define VBIOSSMC_Result_CmdRejectedBusy 0xFC + +/* + * Function to be used instead of REG_WAIT macro because the wait ends when + * the register is NOT EQUAL to zero, and because the translation in msg_if.h + * won't work with REG_WAIT. + */ +static uint32_t rn_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries) +{ + uint32_t res_val = VBIOSSMC_Status_BUSY; + + do { + res_val = REG_READ(MP1_SMN_C2PMSG_91); + if (res_val != VBIOSSMC_Status_BUSY) + break; + + if (delay_us >= 1000) + msleep(delay_us/1000); + else if (delay_us > 0) + udelay(delay_us); + } while (max_retries--); + + return res_val; +} + int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param) { + uint32_t result; + /* First clear response register */ - REG_WRITE(MP1_SMN_C2PMSG_91, 0); + REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY); /* Set the parameter register for the SMU message, unit is Mhz */ REG_WRITE(MP1_SMN_C2PMSG_83, param); @@ -64,7 +99,9 @@ int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned /* Trigger the message transaction by writing the message ID */ REG_WRITE(MP1_SMN_C2PMSG_67, msg_id); - REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + result = rn_smu_wait_for_response(clk_mgr, 10, 1000); + + ASSERT(result == VBIOSSMC_Result_OK || result == VBIOSSMC_Result_UnknownCmd); /* Actual dispclk set is returned in the parameter register */ return REG_READ(MP1_SMN_C2PMSG_83); @@ -196,3 +233,11 @@ void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr) VBIOSSMC_MSG_UpdatePmeRestore, 0); } + +int rn_vbios_smu_is_periodic_retraining_disabled(struct clk_mgr_internal *clk_mgr) +{ + return rn_vbios_smu_send_msg_with_param( + clk_mgr, + VBIOSSMC_MSG_IsPeriodicRetrainingDisabled, + 0); +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h index ccc01879c9d4..3e5df27aa96f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h @@ -36,5 +36,6 @@ int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_ void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, int display_count); void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable); void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr); +int rn_vbios_smu_is_periodic_retraining_disabled(struct clk_mgr_internal *clk_mgr); #endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index b27cb52903f5..d94fdc52be37 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -260,11 +260,6 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, if (enter_display_off == safe_to_lower) dcn30_smu_set_num_of_displays(clk_mgr, display_count); - if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) { - clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz; - dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); - } - if (dc->debug.force_min_dcfclk_mhz > 0) new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000); @@ -344,16 +339,12 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, static void dcn3_notify_wm_ranges(struct clk_mgr *clk_mgr_base) { unsigned int i; - long long table_addr; - WatermarksExternal_t *table; struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + WatermarksExternal_t *table = (WatermarksExternal_t *) clk_mgr->wm_range_table; if (!clk_mgr->smu_present) return; - /* need physical address of table to give to PMFW */ - table = (WatermarksExternal_t *) dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t), &table_addr); - if (!table) // should log failure return; @@ -371,11 +362,9 @@ static void dcn3_notify_wm_ranges(struct clk_mgr *clk_mgr_base) table->Watermarks.WatermarkRow[WM_DCEFCLK][i].Flags = clk_mgr->base.bw_params->wm_table.nv_entries[i].pmfw_breakdown.wm_type; } - dcn30_smu_set_dram_addr_high(clk_mgr, table_addr >> 32); - dcn30_smu_set_dram_addr_low(clk_mgr, table_addr & 0xFFFFFFFF); + dcn30_smu_set_dram_addr_high(clk_mgr, clk_mgr->wm_range_table_addr >> 32); + dcn30_smu_set_dram_addr_low(clk_mgr, clk_mgr->wm_range_table_addr & 0xFFFFFFFF); dcn30_smu_transfer_wm_table_dram_2_smu(clk_mgr); - - dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART, table); } /* Set min memclk to minimum, either constrained by the current mode or DPM0 */ @@ -437,8 +426,6 @@ static bool dcn3_are_clock_states_equal(struct dc_clocks *a, return false; else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) return false; - else if (a->phyclk_khz != b->phyclk_khz) - return false; else if (a->dramclk_khz != b->dramclk_khz) return false; else if (a->p_state_change_support != b->p_state_change_support) @@ -457,6 +444,28 @@ static void dcn3_enable_pme_wa(struct clk_mgr *clk_mgr_base) dcn30_smu_set_pme_workaround(clk_mgr); } +/* Notify clk_mgr of a change in link rate, update phyclk frequency if necessary */ +static void dcn30_notify_link_rate_change(struct clk_mgr *clk_mgr_base, struct dc_link *link) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + unsigned int i, max_phyclk_req = clk_mgr_base->bw_params->clk_table.entries[0].phyclk_mhz * 1000; + + if (!clk_mgr->smu_present) + return; + + clk_mgr->cur_phyclk_req_table[link->link_index] = link->cur_link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ; + + for (i = 0; i < MAX_PIPES * 2; i++) { + if (clk_mgr->cur_phyclk_req_table[i] > max_phyclk_req) + max_phyclk_req = clk_mgr->cur_phyclk_req_table[i]; + } + + if (max_phyclk_req != clk_mgr_base->clks.phyclk_khz) { + clk_mgr_base->clks.phyclk_khz = max_phyclk_req; + dcn30_smu_set_hard_min_by_freq(clk_mgr, PPCLK_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); + } +} + static struct clk_mgr_funcs dcn3_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn3_update_clocks, @@ -466,7 +475,8 @@ static struct clk_mgr_funcs dcn3_funcs = { .set_hard_max_memclk = dcn3_set_hard_max_memclk, .get_memclk_states_from_smu = dcn3_get_memclk_states_from_smu, .are_clock_states_equal = dcn3_are_clock_states_equal, - .enable_pme_wa = dcn3_enable_pme_wa + .enable_pme_wa = dcn3_enable_pme_wa, + .notify_link_rate_change = dcn30_notify_link_rate_change, }; static void dcn3_init_clocks_fpga(struct clk_mgr *clk_mgr) @@ -534,10 +544,19 @@ void dcn3_clk_mgr_construct( dce_clock_read_ss_info(clk_mgr); clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL); + + /* need physical address of table to give to PMFW */ + clk_mgr->wm_range_table = dm_helpers_allocate_gpu_mem(clk_mgr->base.ctx, + DC_MEM_ALLOC_TYPE_GART, sizeof(WatermarksExternal_t), + &clk_mgr->wm_range_table_addr); } void dcn3_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr) { if (clk_mgr->base.bw_params) kfree(clk_mgr->base.bw_params); + + if (clk_mgr->wm_range_table) + dm_helpers_free_gpu_mem(clk_mgr->base.ctx, DC_MEM_ALLOC_TYPE_GART, + clk_mgr->wm_range_table); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c index 986c53a3b6a8..7ee3ec5a8af8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c @@ -37,6 +37,13 @@ #define REG(reg_name) \ mm ## reg_name +#include "logger_types.h" +#undef DC_LOGGER +#define DC_LOGGER \ + CTX->logger +#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); } + + /* * Function to be used instead of REG_WAIT macro because the wait ends when * the register is NOT EQUAL to zero, and because the translation in msg_if.h @@ -94,6 +101,8 @@ bool dcn30_smu_test_message(struct clk_mgr_internal *clk_mgr, uint32_t input) { uint32_t response = 0; + smu_print("SMU Test message: %d\n", input); + if (dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_TestMessage, input, &response)) if (response == input + 1) @@ -104,9 +113,15 @@ bool dcn30_smu_test_message(struct clk_mgr_internal *clk_mgr, uint32_t input) bool dcn30_smu_get_smu_version(struct clk_mgr_internal *clk_mgr, unsigned int *version) { + smu_print("SMU Get SMU version\n"); + if (dcn30_smu_send_msg_with_param(clk_mgr, - DALSMC_MSG_GetSmuVersion, 0, version)) + DALSMC_MSG_GetSmuVersion, 0, version)) { + + smu_print("SMU version: %d\n", *version); + return true; + } return false; } @@ -116,10 +131,16 @@ bool dcn30_smu_check_driver_if_version(struct clk_mgr_internal *clk_mgr) { uint32_t response = 0; + smu_print("SMU Check driver if version\n"); + if (dcn30_smu_send_msg_with_param(clk_mgr, - DALSMC_MSG_GetDriverIfVersion, 0, &response)) + DALSMC_MSG_GetDriverIfVersion, 0, &response)) { + + smu_print("SMU driver if version: %d\n", response); + if (response == SMU11_DRIVER_IF_VERSION) return true; + } return false; } @@ -129,34 +150,48 @@ bool dcn30_smu_check_msg_header_version(struct clk_mgr_internal *clk_mgr) { uint32_t response = 0; + smu_print("SMU Check msg header version\n"); + if (dcn30_smu_send_msg_with_param(clk_mgr, - DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) + DALSMC_MSG_GetMsgHeaderVersion, 0, &response)) { + + smu_print("SMU msg header version: %d\n", response); + if (response == DALSMC_VERSION) return true; + } return false; } void dcn30_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high) { + smu_print("SMU Set DRAM addr high: %d\n", addr_high); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetDalDramAddrHigh, addr_high, NULL); } void dcn30_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low) { + smu_print("SMU Set DRAM addr low: %d\n", addr_low); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetDalDramAddrLow, addr_low, NULL); } void dcn30_smu_transfer_wm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr) { + smu_print("SMU Transfer WM table SMU 2 DRAM\n"); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_TransferTableSmu2Dram, TABLE_WATERMARKS, NULL); } void dcn30_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr) { + smu_print("SMU Transfer WM table DRAM 2 SMU\n"); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS, NULL); } @@ -169,9 +204,13 @@ unsigned int dcn30_smu_set_hard_min_by_freq(struct clk_mgr_internal *clk_mgr, PP /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */ uint32_t param = (clk << 16) | freq_mhz; + smu_print("SMU Set hard min by freq: clk = %d, freq_mhz = %d MHz\n", clk, freq_mhz); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetHardMinByFreq, param, &response); + smu_print("SMU Frequency set = %d MHz\n", response); + return response; } @@ -183,9 +222,13 @@ unsigned int dcn30_smu_set_hard_max_by_freq(struct clk_mgr_internal *clk_mgr, PP /* bits 23:16 for clock type, lower 16 bits for frequency in MHz */ uint32_t param = (clk << 16) | freq_mhz; + smu_print("SMU Set hard max by freq: clk = %d, freq_mhz = %d MHz\n", clk, freq_mhz); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetHardMaxByFreq, param, &response); + smu_print("SMU Frequency set = %d MHz\n", response); + return response; } @@ -210,9 +253,13 @@ unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, P /* bits 23:16 for clock type, lower 8 bits for DPM level */ uint32_t param = (clk << 16) | dpm_level; + smu_print("SMU Get dpm freq by index: clk = %d, dpm_level = %d\n", clk, dpm_level); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetDpmFreqByIndex, param, &response); + smu_print("SMU dpm freq: %d MHz\n", response); + return response; } @@ -224,32 +271,44 @@ unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr /* bits 23:16 for clock type */ uint32_t param = clk << 16; + smu_print("SMU Get DC mode max DPM freq: clk = %d\n", clk); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetDcModeMaxDpmFreq, param, &response); + smu_print("SMU DC mode max DMP freq: %d MHz\n", response); + return response; } void dcn30_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz) { + smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", freq_mhz); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetMinDeepSleepDcefclk, freq_mhz, NULL); } void dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays) { + smu_print("SMU Set num of displays: num_displays = %d\n", num_displays); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_NumOfDisplays, num_displays, NULL); } void dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable) { + smu_print("SMU Set external client df cstate allow: enable = %d\n", enable); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_SetExternalClientDfCstateAllow, enable ? 1 : 0, NULL); } void dcn30_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr) { + smu_print("SMU Set PME workaround\n"); + dcn30_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_BacoAudioD3PME, 0, NULL); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 67402d75e67e..ef0b5941bc50 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2324,7 +2324,7 @@ static void commit_planes_for_stream(struct dc *dc, if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { - if (stream && should_use_dmub_lock(stream->link)) { + if (should_use_dmub_lock(stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; @@ -2607,10 +2607,12 @@ void dc_commit_updates_for_stream(struct dc *dc, copy_stream_update_to_stream(dc, context, stream, stream_update); - if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { - DC_ERROR("Mode validation failed for stream update!\n"); - dc_release_state(context); - return; + if (update_type > UPDATE_TYPE_FAST) { + if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { + DC_ERROR("Mode validation failed for stream update!\n"); + dc_release_state(context); + return; + } } commit_planes_for_stream( @@ -2681,6 +2683,12 @@ void dc_interrupt_ack(struct dc *dc, enum dc_irq_source src) dal_irq_service_ack(dc->res_pool->irqs, src); } +void dc_power_down_on_boot(struct dc *dc) +{ + if (dc->ctx->dce_environment != DCE_ENV_VIRTUAL_HW && + dc->hwss.power_down_on_boot) + dc->hwss.power_down_on_boot(dc); +} void dc_set_power_state( struct dc *dc, @@ -2693,6 +2701,9 @@ void dc_set_power_state( case DC_ACPI_CM_POWER_STATE_D0: dc_resource_state_construct(dc, dc->current_state); + if (dc->ctx->dmub_srv) + dc_dmub_srv_wait_phy_init(dc->ctx->dmub_srv); + dc->hwss.init_hw(dc); if (dc->hwss.init_sys_ctx != NULL && diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 3d969f83b3cc..1f94591ce5fb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3097,62 +3097,63 @@ static void get_active_converter_info( uint8_t det_caps[16]; /* CTS 4.2.2.7 expects source to read Detailed Capabilities Info : 00080h-0008F.*/ union dwnstream_port_caps_byte0 *port_caps = (union dwnstream_port_caps_byte0 *)det_caps; - core_link_read_dpcd(link, DP_DOWNSTREAM_PORT_0, - det_caps, sizeof(det_caps)); + if (core_link_read_dpcd(link, DP_DOWNSTREAM_PORT_0, + det_caps, sizeof(det_caps)) == DC_OK) { - switch (port_caps->bits.DWN_STRM_PORTX_TYPE) { - /*Handle DP case as DONGLE_NONE*/ - case DOWN_STREAM_DETAILED_DP: - link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE; - break; - case DOWN_STREAM_DETAILED_VGA: - link->dpcd_caps.dongle_type = - DISPLAY_DONGLE_DP_VGA_CONVERTER; - break; - case DOWN_STREAM_DETAILED_DVI: - link->dpcd_caps.dongle_type = - DISPLAY_DONGLE_DP_DVI_CONVERTER; - break; - case DOWN_STREAM_DETAILED_HDMI: - case DOWN_STREAM_DETAILED_DP_PLUS_PLUS: - /*Handle DP++ active converter case, process DP++ case as HDMI case according DP1.4 spec*/ - link->dpcd_caps.dongle_type = - DISPLAY_DONGLE_DP_HDMI_CONVERTER; - - link->dpcd_caps.dongle_caps.dongle_type = link->dpcd_caps.dongle_type; - if (ds_port.fields.DETAILED_CAPS) { - - union dwnstream_port_caps_byte3_hdmi - hdmi_caps = {.raw = det_caps[3] }; - union dwnstream_port_caps_byte2 - hdmi_color_caps = {.raw = det_caps[2] }; - link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz = - det_caps[1] * 2500; - - link->dpcd_caps.dongle_caps.is_dp_hdmi_s3d_converter = - hdmi_caps.bits.FRAME_SEQ_TO_FRAME_PACK; - /*YCBCR capability only for HDMI case*/ - if (port_caps->bits.DWN_STRM_PORTX_TYPE - == DOWN_STREAM_DETAILED_HDMI) { - link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_pass_through = - hdmi_caps.bits.YCrCr422_PASS_THROUGH; - link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_pass_through = - hdmi_caps.bits.YCrCr420_PASS_THROUGH; - link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_converter = - hdmi_caps.bits.YCrCr422_CONVERSION; - link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_converter = - hdmi_caps.bits.YCrCr420_CONVERSION; + switch (port_caps->bits.DWN_STRM_PORTX_TYPE) { + /*Handle DP case as DONGLE_NONE*/ + case DOWN_STREAM_DETAILED_DP: + link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE; + break; + case DOWN_STREAM_DETAILED_VGA: + link->dpcd_caps.dongle_type = + DISPLAY_DONGLE_DP_VGA_CONVERTER; + break; + case DOWN_STREAM_DETAILED_DVI: + link->dpcd_caps.dongle_type = + DISPLAY_DONGLE_DP_DVI_CONVERTER; + break; + case DOWN_STREAM_DETAILED_HDMI: + case DOWN_STREAM_DETAILED_DP_PLUS_PLUS: + /*Handle DP++ active converter case, process DP++ case as HDMI case according DP1.4 spec*/ + link->dpcd_caps.dongle_type = + DISPLAY_DONGLE_DP_HDMI_CONVERTER; + + link->dpcd_caps.dongle_caps.dongle_type = link->dpcd_caps.dongle_type; + if (ds_port.fields.DETAILED_CAPS) { + + union dwnstream_port_caps_byte3_hdmi + hdmi_caps = {.raw = det_caps[3] }; + union dwnstream_port_caps_byte2 + hdmi_color_caps = {.raw = det_caps[2] }; + link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz = + det_caps[1] * 2500; + + link->dpcd_caps.dongle_caps.is_dp_hdmi_s3d_converter = + hdmi_caps.bits.FRAME_SEQ_TO_FRAME_PACK; + /*YCBCR capability only for HDMI case*/ + if (port_caps->bits.DWN_STRM_PORTX_TYPE + == DOWN_STREAM_DETAILED_HDMI) { + link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_pass_through = + hdmi_caps.bits.YCrCr422_PASS_THROUGH; + link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_pass_through = + hdmi_caps.bits.YCrCr420_PASS_THROUGH; + link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_converter = + hdmi_caps.bits.YCrCr422_CONVERSION; + link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_converter = + hdmi_caps.bits.YCrCr420_CONVERSION; + } + + link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc = + translate_dpcd_max_bpc( + hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); + + if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz != 0) + link->dpcd_caps.dongle_caps.extendedCapValid = true; } - link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc = - translate_dpcd_max_bpc( - hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); - - if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk_in_khz != 0) - link->dpcd_caps.dongle_caps.extendedCapValid = true; + break; } - - break; } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 1b3474aa380d..dd88eb348dfa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -14,6 +14,7 @@ #include "dpcd_defs.h" #include "dsc.h" #include "resource.h" +#include "clk_mgr.h" static uint8_t convert_to_count(uint8_t lttpr_repeater_count) { @@ -123,6 +124,11 @@ void dp_enable_link_phy( } } + link->cur_link_settings = *link_settings; + + if (dc->clk_mgr->funcs->notify_link_rate_change) + dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link); + if (dmcu != NULL && dmcu->funcs->lock_phy) dmcu->funcs->lock_phy(dmcu); @@ -141,8 +147,6 @@ void dp_enable_link_phy( if (dmcu != NULL && dmcu->funcs->unlock_phy) dmcu->funcs->unlock_phy(dmcu); - link->cur_link_settings = *link_settings; - dp_receiver_power_ctrl(link, true); } @@ -151,7 +155,8 @@ bool edp_receiver_ready_T9(struct dc_link *link) unsigned int tries = 0; unsigned char sinkstatus = 0; unsigned char edpRev = 0; - enum dc_status result = DC_OK; + enum dc_status result; + result = core_link_read_dpcd(link, DP_EDP_DPCD_REV, &edpRev, sizeof(edpRev)); /* start from eDP version 1.2, SINK_STAUS indicate the sink is ready.*/ @@ -177,7 +182,7 @@ bool edp_receiver_ready_T7(struct dc_link *link) { unsigned char sinkstatus = 0; unsigned char edpRev = 0; - enum dc_status result = DC_OK; + enum dc_status result; /* use absolute time stamp to constrain max T7*/ unsigned long long enter_timestamp = 0; @@ -233,6 +238,9 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) /* Clear current link setting.*/ memset(&link->cur_link_settings, 0, sizeof(link->cur_link_settings)); + + if (dc->clk_mgr->funcs->notify_link_rate_change) + dc->clk_mgr->funcs->notify_link_rate_change(dc->clk_mgr, link); } void dp_disable_link_phy_mst(struct dc_link *link, enum signal_type signal) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 1000dc6daf72..7b5f90ebb133 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1162,19 +1162,32 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) /* May need to re-check lb size after this in some obscure scenario */ calculate_inits_and_adj_vp(pipe_ctx); - DC_LOG_SCALER( - "%s: Viewport:\nheight:%d width:%d x:%d " - "y:%d\n dst_rect:\nheight:%d width:%d x:%d " - "y:%d\n", - __func__, - pipe_ctx->plane_res.scl_data.viewport.height, - pipe_ctx->plane_res.scl_data.viewport.width, - pipe_ctx->plane_res.scl_data.viewport.x, - pipe_ctx->plane_res.scl_data.viewport.y, - plane_state->dst_rect.height, - plane_state->dst_rect.width, - plane_state->dst_rect.x, - plane_state->dst_rect.y); + DC_LOG_SCALER("%s pipe %d:\nViewport: height:%d width:%d x:%d y:%d Recout: height:%d width:%d x:%d y:%d HACTIVE:%d VACTIVE:%d\n" + "src_rect: height:%d width:%d x:%d y:%d dst_rect: height:%d width:%d x:%d y:%d clip_rect: height:%d width:%d x:%d y:%d\n", + __func__, + pipe_ctx->pipe_idx, + pipe_ctx->plane_res.scl_data.viewport.height, + pipe_ctx->plane_res.scl_data.viewport.width, + pipe_ctx->plane_res.scl_data.viewport.x, + pipe_ctx->plane_res.scl_data.viewport.y, + pipe_ctx->plane_res.scl_data.recout.height, + pipe_ctx->plane_res.scl_data.recout.width, + pipe_ctx->plane_res.scl_data.recout.x, + pipe_ctx->plane_res.scl_data.recout.y, + pipe_ctx->plane_res.scl_data.h_active, + pipe_ctx->plane_res.scl_data.v_active, + plane_state->src_rect.height, + plane_state->src_rect.width, + plane_state->src_rect.x, + plane_state->src_rect.y, + plane_state->dst_rect.height, + plane_state->dst_rect.width, + plane_state->dst_rect.x, + plane_state->dst_rect.y, + plane_state->clip_rect.height, + plane_state->clip_rect.width, + plane_state->clip_rect.x, + plane_state->clip_rect.y); if (store_h_border_left) restore_border_left_from_dst(pipe_ctx, store_h_border_left); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 3b897372ed27..d6989d115c5c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -56,7 +56,7 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink) } } -static void dc_stream_construct(struct dc_stream_state *stream, +static bool dc_stream_construct(struct dc_stream_state *stream, struct dc_sink *dc_sink_data) { uint32_t i = 0; @@ -118,11 +118,17 @@ static void dc_stream_construct(struct dc_stream_state *stream, update_stream_signal(stream, dc_sink_data); stream->out_transfer_func = dc_create_transfer_func(); + if (stream->out_transfer_func == NULL) { + dc_sink_release(dc_sink_data); + return false; + } stream->out_transfer_func->type = TF_TYPE_BYPASS; stream->out_transfer_func->ctx = stream->ctx; stream->stream_id = stream->ctx->dc_stream_id_count; stream->ctx->dc_stream_id_count++; + + return true; } static void dc_stream_destruct(struct dc_stream_state *stream) @@ -164,13 +170,20 @@ struct dc_stream_state *dc_create_stream_for_sink( stream = kzalloc(sizeof(struct dc_stream_state), GFP_KERNEL); if (stream == NULL) - return NULL; + goto alloc_fail; - dc_stream_construct(stream, sink); + if (dc_stream_construct(stream, sink) == false) + goto construct_fail; kref_init(&stream->refcount); return stream; + +construct_fail: + kfree(stream); + +alloc_fail: + return NULL; } struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index f7cb1354a635..e5a1a9eb6217 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.91" +#define DC_VER "3.2.94" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -466,6 +466,7 @@ struct dc_debug_options { bool scl_reset_length10; bool hdmi20_disable; bool skip_detection_link_training; + bool edid_read_retry_times; bool remove_disconnect_edp; unsigned int force_odm_combine; //bit vector based on otg inst #if defined(CONFIG_DRM_AMD_DC_DCN3_0) @@ -1028,6 +1029,7 @@ bool dc_resource_is_dsc_encoding_supported(const struct dc *dc); */ bool dc_commit_state(struct dc *dc, struct dc_state *context); +void dc_power_down_on_boot(struct dc *dc); struct dc_state *dc_create_state(struct dc *dc); struct dc_state *dc_copy_state(struct dc_state *src_ctx); @@ -1228,6 +1230,8 @@ void dc_set_power_state( enum dc_acpi_cm_power_state power_state); void dc_resume(struct dc *dc); +void dc_power_down_on_boot(struct dc *dc); + #if defined(CONFIG_DRM_AMD_DC_HDCP) /* * HDCP Interfaces diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 96532f7ba480..eea2429ac67d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -106,17 +106,29 @@ void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) DC_ERROR("Error waiting for DMUB idle: status=%d\n", status); } -bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) +void dc_dmub_srv_wait_phy_init(struct dc_dmub_srv *dc_dmub_srv) { - struct dmub_srv *dmub; - union dmub_fw_boot_status status; + struct dmub_srv *dmub = dc_dmub_srv->dmub; + struct dc_context *dc_ctx = dc_dmub_srv->ctx; + enum dmub_status status; - if (!dc_dmub_srv || !dc_dmub_srv->dmub) - return false; + for (;;) { + /* Wait up to a second for PHY init. */ + status = dmub_srv_wait_for_phy_init(dmub, 1000000); + if (status == DMUB_STATUS_OK) + /* Initialization OK */ + break; - dmub = dc_dmub_srv->dmub; + DC_ERROR("DMCUB PHY init failed: status=%d\n", status); + ASSERT(0); - status = dmub->hw_funcs.get_fw_status(dmub); + if (status != DMUB_STATUS_TIMEOUT) + /* + * Server likely initialized or we don't have + * DMCUB HW support - this won't end. + */ + break; - return status.bits.optimized_init_done; + /* Continue spinning so we don't hang the ASIC. */ + } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 8bd20d0d7689..a3a09ccb6d26 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -56,6 +56,4 @@ void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); void dc_dmub_srv_wait_phy_init(struct dc_dmub_srv *dc_dmub_srv); -bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); - #endif /* _DMUB_DC_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index d64241433548..29fe5389f973 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -177,6 +177,7 @@ enum dc_edid_status { EDID_NO_RESPONSE, EDID_BAD_CHECKSUM, EDID_THE_SAME, + EDID_FALL_BACK, }; enum act_return_status { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 916d305d3022..82e67bd81f2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -234,6 +234,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR ? true : false; copy_settings_data->init_sdp_deadline = psr_context->sdpTransmitLineNumDeadline; + copy_settings_data->debug.bitfields.use_hw_lock_mgr = 0; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index cb45f05a0319..da0897fe3b54 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1255,6 +1255,7 @@ void dcn10_init_hw(struct dc *dc) struct dc_bios *dcb = dc->ctx->dc_bios; struct resource_pool *res_pool = dc->res_pool; uint32_t backlight = MAX_BACKLIGHT_LEVEL; + bool is_optimized_init_done = false; if (dc->clk_mgr && dc->clk_mgr->funcs->init_clocks) dc->clk_mgr->funcs->init_clocks(dc->clk_mgr); @@ -1288,8 +1289,7 @@ void dcn10_init_hw(struct dc *dc) if (!dcb->funcs->is_accelerated_mode(dcb)) hws->funcs.disable_vga(dc->hwseq); - if (!dc_dmub_srv_optimized_init_done(dc->ctx->dmub_srv)) - hws->funcs.bios_golden_init(dc); + hws->funcs.bios_golden_init(dc); if (dc->ctx->dc_bios->fw_info_valid) { res_pool->ref_clocks.xtalin_clock_inKhz = @@ -1323,7 +1323,8 @@ void dcn10_init_hw(struct dc *dc) */ struct dc_link *link = dc->links[i]; - link->link_enc->funcs->hw_init(link->link_enc); + if (!is_optimized_init_done) + link->link_enc->funcs->hw_init(link->link_enc); /* Check for enabled DIG to identify enabled display */ if (link->link_enc->funcs->is_dig_enabled && @@ -1332,9 +1333,11 @@ void dcn10_init_hw(struct dc *dc) } /* Power gate DSCs */ - for (i = 0; i < res_pool->res_cap->num_dsc; i++) - if (hws->funcs.dsc_pg_control != NULL) - hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); + if (!is_optimized_init_done) { + for (i = 0; i < res_pool->res_cap->num_dsc; i++) + if (hws->funcs.dsc_pg_control != NULL) + hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); + } /* we want to turn off all dp displays before doing detection */ if (dc->config.power_down_display_on_boot) { @@ -1379,68 +1382,42 @@ void dcn10_init_hw(struct dc *dc) * everything down. */ if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) { - hws->funcs.init_pipes(dc, dc->current_state); - if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) - dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, - !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); + if (!is_optimized_init_done) { + hws->funcs.init_pipes(dc, dc->current_state); + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) + dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, + !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); + } } - /* In headless boot cases, DIG may be turned - * on which causes HW/SW discrepancies. - * To avoid this, power down hardware on boot - * if DIG is turned on and seamless boot not enabled - */ - if (dc->config.power_down_display_on_boot) { - struct dc_link *edp_link = get_edp_link(dc); + if (!is_optimized_init_done) { - if (edp_link && - edp_link->link_enc->funcs->is_dig_enabled && - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && - dc->hwseq->funcs.edp_backlight_control && - dc->hwss.power_down && - dc->hwss.edp_power_control) { - dc->hwseq->funcs.edp_backlight_control(edp_link, false); - dc->hwss.power_down(dc); - dc->hwss.edp_power_control(edp_link, false); - } else { - for (i = 0; i < dc->link_count; i++) { - struct dc_link *link = dc->links[i]; + for (i = 0; i < res_pool->audio_count; i++) { + struct audio *audio = res_pool->audios[i]; - if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc) && - dc->hwss.power_down) { - dc->hwss.power_down(dc); - break; - } - - } + audio->funcs->hw_init(audio); } - } - for (i = 0; i < res_pool->audio_count; i++) { - struct audio *audio = res_pool->audios[i]; + for (i = 0; i < dc->link_count; i++) { + struct dc_link *link = dc->links[i]; - audio->funcs->hw_init(audio); - } + if (link->panel_cntl) + backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + } - for (i = 0; i < dc->link_count; i++) { - struct dc_link *link = dc->links[i]; + if (abm != NULL) + abm->funcs->abm_init(abm, backlight); - if (link->panel_cntl) - backlight = link->panel_cntl->funcs->hw_init(link->panel_cntl); + if (dmcu != NULL && !dmcu->auto_load_dmcu) + dmcu->funcs->dmcu_init(dmcu); } - if (abm != NULL) - abm->funcs->abm_init(abm, backlight); - - if (dmcu != NULL && !dmcu->auto_load_dmcu) - dmcu->funcs->dmcu_init(dmcu); - if (abm != NULL && dmcu != NULL) abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu); /* power AFMT HDMI memory TODO: may move to dis/en output save power*/ - REG_WRITE(DIO_MEM_PWR_CTRL, 0); + if (!is_optimized_init_done) + REG_WRITE(DIO_MEM_PWR_CTRL, 0); if (!dc->debug.disable_clock_gate) { /* enable all DCN clock gating */ @@ -1463,6 +1440,43 @@ void dcn10_init_hw(struct dc *dc) } +/* In headless boot cases, DIG may be turned + * on which causes HW/SW discrepancies. + * To avoid this, power down hardware on boot + * if DIG is turned on and seamless boot not enabled + */ +void dcn10_power_down_on_boot(struct dc *dc) +{ + int i = 0; + + if (dc->config.power_down_display_on_boot) { + struct dc_link *edp_link = get_edp_link(dc); + + if (edp_link && + edp_link->link_enc->funcs->is_dig_enabled && + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc) && + dc->hwseq->funcs.edp_backlight_control && + dc->hwss.power_down && + dc->hwss.edp_power_control) { + dc->hwseq->funcs.edp_backlight_control(edp_link, false); + dc->hwss.power_down(dc); + dc->hwss.edp_power_control(edp_link, false); + } else { + for (i = 0; i < dc->link_count; i++) { + struct dc_link *link = dc->links[i]; + + if (link->link_enc->funcs->is_dig_enabled && + link->link_enc->funcs->is_dig_enabled(link->link_enc) && + dc->hwss.power_down) { + dc->hwss.power_down(dc); + break; + } + + } + } + } +} + void dcn10_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context) @@ -2445,14 +2459,46 @@ static void dcn10_update_dchubp_dpp( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct plane_size size = plane_state->plane_size; unsigned int compat_level = 0; + bool should_divided_by_2 = false; /* depends on DML calculation, DPP clock value may change dynamically */ /* If request max dpp clk is lower than current dispclk, no need to * divided by 2 */ if (plane_state->update_flags.bits.full_update) { - bool should_divided_by_2 = context->bw_ctx.bw.dcn.clk.dppclk_khz <= - dc->clk_mgr->clks.dispclk_khz / 2; + + /* new calculated dispclk, dppclk are stored in + * context->bw_ctx.bw.dcn.clk.dispclk_khz / dppclk_khz. current + * dispclk, dppclk are from dc->clk_mgr->clks.dispclk_khz. + * dcn_validate_bandwidth compute new dispclk, dppclk. + * dispclk will put in use after optimize_bandwidth when + * ramp_up_dispclk_with_dpp is called. + * there are two places for dppclk be put in use. One location + * is the same as the location as dispclk. Another is within + * update_dchubp_dpp which happens between pre_bandwidth and + * optimize_bandwidth. + * dppclk updated within update_dchubp_dpp will cause new + * clock values of dispclk and dppclk not be in use at the same + * time. when clocks are decreased, this may cause dppclk is + * lower than previous configuration and let pipe stuck. + * for example, eDP + external dp, change resolution of DP from + * 1920x1080x144hz to 1280x960x60hz. + * before change: dispclk = 337889 dppclk = 337889 + * change mode, dcn_validate_bandwidth calculate + * dispclk = 143122 dppclk = 143122 + * update_dchubp_dpp be executed before dispclk be updated, + * dispclk = 337889, but dppclk use new value dispclk /2 = + * 168944. this will cause pipe pstate warning issue. + * solution: between pre_bandwidth and optimize_bandwidth, while + * dispclk is going to be decreased, keep dppclk = dispclk + **/ + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < + dc->clk_mgr->clks.dispclk_khz) + should_divided_by_2 = false; + else + should_divided_by_2 = + context->bw_ctx.bw.dcn.clk.dppclk_khz <= + dc->clk_mgr->clks.dispclk_khz / 2; dpp->funcs->dpp_dppclk_control( dpp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 42b6e016d71e..6d891166da8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -109,6 +109,7 @@ void dcn10_program_pipe( void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx); void dcn10_init_hw(struct dc *dc); void dcn10_init_pipes(struct dc *dc, struct dc_state *context); +void dcn10_power_down_on_boot(struct dc *dc); enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index f6a790c49321..5c98b71c1d47 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -30,6 +30,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dcn10_apply_ctx_for_surface, .post_unlock_program_front_end = dcn10_post_unlock_program_front_end, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 5621c95177d2..7725a406c16e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1482,22 +1482,23 @@ static void dcn20_update_dchubp_dpp( memset(&adjust, 0, sizeof(adjust)); adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; - /* save the enablement of gamut remap for dpp*/ + + /* save the enablement of gamut remap for dpp */ enable_remap_dpp = pipe_ctx->stream->gamut_remap_matrix.enable_remap; - /*force bypass gamut remap for dpp/cm*/ + + /* force bypass gamut remap for dpp/cm */ pipe_ctx->stream->gamut_remap_matrix.enable_remap = false; dc->hwss.program_gamut_remap(pipe_ctx); - /*restore gamut remap flag for the top plane and use this remap into mpc*/ - if (pipe_ctx->top_pipe == NULL) - pipe_ctx->stream->gamut_remap_matrix.enable_remap = enable_remap_dpp; - else - pipe_ctx->stream->gamut_remap_matrix.enable_remap = false; - - if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { - adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) - adjust.temperature_matrix[i] = - pipe_ctx->stream->gamut_remap_matrix.matrix[i]; + + /* restore gamut remap flag and use this remap into mpc */ + pipe_ctx->stream->gamut_remap_matrix.enable_remap = enable_remap_dpp; + + /* build remap matrix for top plane if enabled */ + if (enable_remap_dpp && pipe_ctx->top_pipe == NULL) { + adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; + for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) + adjust.temperature_matrix[i] = + pipe_ctx->stream->gamut_remap_matrix.matrix[i]; } mpc->funcs->set_gamut_remap(mpc, mpcc_id, &adjust); } else diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index bb9e9bec2f28..2380392b916e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -30,6 +30,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = NULL, .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 0983bcc25117..e226647088b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2102,11 +2102,20 @@ int dcn20_populate_dml_pipes_from_context( if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == res_ctx->pipe_ctx[i].plane_state) { struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].top_pipe; + int split_idx = 0; while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state - == res_ctx->pipe_ctx[i].plane_state) + == res_ctx->pipe_ctx[i].plane_state) { first_pipe = first_pipe->top_pipe; - pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx; + split_idx++; + } + /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */ + if (split_idx == 0) + pipes[pipe_cnt].pipe.src.hsplit_grp = first_pipe->pipe_idx; + else if (split_idx == 1) + pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].pipe_idx; + else if (split_idx == 2) + pipes[pipe_cnt].pipe.src.hsplit_grp = res_ctx->pipe_ctx[i].top_pipe->pipe_idx; } else if (res_ctx->pipe_ctx[i].prev_odm_pipe) { struct pipe_ctx *first_pipe = res_ctx->pipe_ctx[i].prev_odm_pipe; @@ -2258,7 +2267,7 @@ int dcn20_populate_dml_pipes_from_context( || (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state == pln) || pipes[pipe_cnt].pipe.dest.odm_combine != dm_odm_combine_mode_disabled; - /* stereo is never split, nor odm combine */ + /* stereo is not split */ if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE || pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) { pipes[pipe_cnt].pipe.src.is_hsplit = false; @@ -2721,12 +2730,11 @@ int dcn20_validate_apply_pipe_split_flags( if (!context->res_ctx.pipe_ctx[i].stream) continue; - if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] > 1) { - if (split4mpc) - split[i] = 4; - else + if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4) + split[i] = 4; + else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2) split[i] = 2; - } + if ((pipe->stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE || pipe->stream->view_format == diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 8575de1a8ad2..177d0dc8927a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -31,6 +31,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .program_gamut_remap = dcn10_program_gamut_remap, .init_hw = dcn10_init_hw, + .power_down_on_boot = dcn10_power_down_on_boot, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = NULL, .program_front_end_for_ctx = dcn20_program_front_end_for_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h index 8db6d76a1131..0b1755f1dea8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.h @@ -248,6 +248,7 @@ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_ENABLE, mask_sh),\ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE_REFERENCE, mask_sh),\ SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DOLBY_VISION_EN, mask_sh),\ SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_COMBINE, mask_sh),\ SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh),\ SE_SF(DIG0_DIG_CLOCK_PATTERN, DIG_CLOCK_PATTERN, mask_sh) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h index d4106dd5a9b0..33f13c1e7520 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h @@ -98,6 +98,7 @@ SRI(OTG_GSL_WINDOW_Y, OTG, inst),\ SRI(OTG_VUPDATE_KEEPOUT, OTG, inst),\ SRI(OTG_DSC_START_POSITION, OTG, inst),\ + SRI(OTG_CRC_CNTL2, OTG, inst),\ SRI(OTG_DRR_TRIGGER_WINDOW, OTG, inst),\ SRI(OTG_DRR_V_TOTAL_CHANGE, OTG, inst),\ SRI(OPTC_DATA_FORMAT_CONTROL, ODM, inst),\ @@ -248,6 +249,10 @@ SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_NUM_OF_INPUT_SEGMENT, mask_sh),\ @@ -280,6 +285,10 @@ SF(OTG0_OTG_GSL_CONTROL, OTG_MASTER_UPDATE_LOCK_GSL_EN, mask_sh), \ SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_X, mask_sh), \ SF(OTG0_OTG_DSC_START_POSITION, OTG_DSC_START_POSITION_LINE_NUM, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DSC_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_COMBINE_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_STREAM_SPLIT_MODE, mask_sh),\ + SF(OTG0_OTG_CRC_CNTL2, OTG_CRC_DATA_FORMAT, mask_sh),\ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG0_SRC_SEL, mask_sh),\ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG1_SRC_SEL, mask_sh),\ SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SEG2_SRC_SEL, mask_sh),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index d7ba895de765..653a571e366d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1870,12 +1870,14 @@ static bool dcn30_split_stream_for_mpc_or_odm( return true; } -static bool dcn30_fast_validate_bw( + +static bool dcn30_internal_validate_bw( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *pipe_cnt_out, - int *vlevel_out) + int *vlevel_out, + bool fast_validate) { bool out = false; bool repopulate_pipes = false; @@ -1898,7 +1900,38 @@ static bool dcn30_fast_validate_bw( dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (!fast_validate) { + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh_and_mclk_switch; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (vlevel < context->bw_ctx.dml.soc.num_states) + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* + * If mode is unsupported or there's still no p-state support then + * fall back to favoring voltage. + * + * We don't actually support prefetch mode 2, so require that we + * at least support prefetch mode 1. + */ + context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = + dm_allow_self_refresh; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + } dml_log_mode_support_params(&context->bw_ctx.dml); @@ -1938,8 +1971,6 @@ static bool dcn30_fast_validate_bw( pipe_idx++; } - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - /* merge pipes if necessary */ for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -2187,7 +2218,8 @@ static void dcn30_calculate_wm( } } -bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, +bool dcn30_validate_bandwidth(struct dc *dc, + struct dc_state *context, bool fast_validate) { bool out = false; @@ -2201,7 +2233,7 @@ bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, BW_VAL_TRACE_COUNT(); - out = dcn30_fast_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel); + out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); if (pipe_cnt == 0) goto validate_out; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 75dc4fe41731..b54814f11b74 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -2567,7 +2567,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman &v->VRatioPrefetchC[k], &v->RequiredPrefetchPixDataBWLuma[k], &v->RequiredPrefetchPixDataBWChroma[k], - &v->NotEnoughTimeForDynamicMetadata, + &v->NotEnoughTimeForDynamicMetadata[k], &v->Tno_bw[k], &v->prefetch_vmrow_bw[k], &v->Tdmdl_vm[k], @@ -2686,7 +2686,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; - if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding == 0 && v->NotEnoughUrgentLatencyHidingPre == 0 && v->NotEnoughTimeForDynamicMetadata == 0 && !VRatioPrefetchMoreThan4 + if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding == 0 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) v->PrefetchModeSupported = true; else { @@ -2695,8 +2695,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW); dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not"); dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); - dml_print("DML: Not enough lines for dynamic meta is %s\n", (v->NotEnoughTimeForDynamicMetadata) ? "true" : "false"); - } if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { @@ -2786,7 +2784,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } for (k = 0; k < v->NumberOfActivePlanes; ++k) { - if (v->ErrorResult[k]) { + if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) { v->PrefetchModeSupported = false; dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 0fac7f754604..6ab74640c0da 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -24,6 +24,7 @@ */ #include "dc_features.h" +#include "display_mode_enums.h" #ifndef __DISPLAY_MODE_STRUCTS_H__ #define __DISPLAY_MODE_STRUCTS_H__ @@ -120,6 +121,7 @@ struct _vcs_dpi_soc_bounding_box_st { double urgent_latency_adjustment_fabric_clock_reference_mhz; bool disable_dram_clock_change_vactive_support; bool allow_dram_clock_one_display_vactive; + enum self_refresh_affinity allow_dram_self_refresh_or_dram_clock_change_in_vblank; }; struct _vcs_dpi_ip_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index f615815c73bd..756d8eb1221c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -802,7 +802,7 @@ struct vba_vars_st { unsigned int DCCCMaxCompressedBlock[DC__NUM_DPP__MAX]; unsigned int DCCCIndependent64ByteBlock[DC__NUM_DPP__MAX]; double VStartupMargin; - bool NotEnoughTimeForDynamicMetadata; + bool NotEnoughTimeForDynamicMetadata[DC__NUM_DPP__MAX]; /* Missing from VBA */ unsigned int MaximumMaxVStartupLines; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 505357597603..5994d2a33c40 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -241,6 +241,9 @@ struct clk_mgr_funcs { bool (*are_clock_states_equal) (struct dc_clocks *a, struct dc_clocks *b); void (*notify_wm_ranges)(struct clk_mgr *clk_mgr); + + /* Notify clk_mgr of a change in link rate, update phyclk frequency if necessary */ + void (*notify_link_rate_change)(struct clk_mgr *clk_mgr, struct dc_link *link); #ifdef CONFIG_DRM_AMD_DC_DCN3_0 /* * Send message to PMFW to set hard min memclk frequency diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index c3c151be7d03..4e6e18bbef5d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -270,9 +270,14 @@ struct clk_mgr_internal { enum dm_pp_clocks_state max_clks_state; enum dm_pp_clocks_state cur_min_clks_state; + bool periodic_retraining_disabled; + + unsigned int cur_phyclk_req_table[MAX_PIPES * 2]; #ifdef CONFIG_DRM_AMD_DC_DCN3_0 bool smu_present; + void *wm_range_table; + long long wm_range_table_addr; #endif }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 066a2a723c12..720ce5e458d8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -56,6 +56,7 @@ struct hw_sequencer_funcs { /* Pipe Programming Related */ void (*init_hw)(struct dc *dc); + void (*power_down_on_boot)(struct dc *dc); void (*enable_accelerated_mode)(struct dc *dc, struct dc_state *context); enum dc_status (*apply_ctx_to_hw)(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index dd4d7a1dc3b6..49689f71f4f1 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -169,6 +169,11 @@ static const struct irq_source_info_funcs pflip_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + static const struct irq_source_info_funcs vblank_irq_info_funcs = { .set = NULL, .ack = NULL @@ -228,12 +233,15 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .funcs = &pflip_irq_info_funcs\ } -#define vupdate_int_entry(reg_num)\ +/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic + * of DCE's DC_IRQ_SOURCE_VUPDATEx. + */ +#define vupdate_no_lock_int_entry(reg_num)\ [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_INT_EN,\ - OTG_GLOBAL_SYNC_STATUS, VUPDATE_EVENT_CLEAR),\ - .funcs = &vblank_irq_info_funcs\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\ + .funcs = &vupdate_no_lock_irq_info_funcs\ } #define vblank_int_entry(reg_num)\ @@ -340,12 +348,12 @@ irq_source_info_dcn30[DAL_IRQ_SOURCES_NUMBER] = { dc_underflow_int_entry(6), [DC_IRQ_SOURCE_DMCU_SCP] = dummy_irq_entry(), [DC_IRQ_SOURCE_VBIOS_SW] = dummy_irq_entry(), - vupdate_int_entry(0), - vupdate_int_entry(1), - vupdate_int_entry(2), - vupdate_int_entry(3), - vupdate_int_entry(4), - vupdate_int_entry(5), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), + vupdate_no_lock_int_entry(4), + vupdate_no_lock_int_entry(5), vblank_int_entry(0), vblank_int_entry(1), vblank_int_entry(2), diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 3cac170312fc..c6a8d6c54621 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -264,10 +264,9 @@ struct dmub_srv_hw_funcs { bool (*is_hw_init)(struct dmub_srv *dmub); - void (*enable_dmub_boot_options)(struct dmub_srv *dmub); - - union dmub_fw_boot_status (*get_fw_status)(struct dmub_srv *dmub); + bool (*is_phy_init)(struct dmub_srv *dmub); + bool (*is_auto_load_done)(struct dmub_srv *dmub); void (*set_gpint)(struct dmub_srv *dmub, union dmub_gpint_data_register reg); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 68b5fd811d26..513a5f8f817e 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -36,10 +36,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0xf87bb940b -#define DMUB_FW_VERSION_MAJOR 1 +#define DMUB_FW_VERSION_GIT_HASH 0xf675c6448 +#define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 19 +#define DMUB_FW_VERSION_REVISION 24 #define DMUB_FW_VERSION_UCODE ((DMUB_FW_VERSION_MAJOR << 24) | (DMUB_FW_VERSION_MINOR << 16) | DMUB_FW_VERSION_REVISION) #endif @@ -123,12 +123,15 @@ union dmub_psr_debug_flags { * @fw_region_size: size of the firmware state region * @trace_buffer_size: size of the tracebuffer region * @fw_version: the firmware version information + * @dal_fw: 1 if the firmware is DAL */ struct dmub_fw_meta_info { uint32_t magic_value; uint32_t fw_region_size; uint32_t trace_buffer_size; uint32_t fw_version; + uint8_t dal_fw; + uint8_t reserved[3]; }; /* Ensure that the structure remains 64 bytes. */ @@ -151,15 +154,6 @@ union dmub_fw_meta { * SCRATCH15: FW Boot Options register */ -/** - * DMCUB firmware status bits for SCRATCH2. - */ -enum dmub_fw_status_bit { - DMUB_FW_STATUS_BIT_DAL_FIRMWARE = (1 << 0), - DMUB_FW_STATUS_BIT_COMMAND_TABLE_READY = (1 << 1), -}; - - /* Register bit definition for SCRATCH0 */ union dmub_fw_boot_status { struct { @@ -260,6 +254,11 @@ enum dmub_gpint_command { DMUB_GPINT__GET_FW_VERSION = 1, DMUB_GPINT__STOP_FW = 2, DMUB_GPINT__GET_PSR_STATE = 7, + /** + * DESC: Notifies DMCUB of the currently active streams. + * ARGS: Stream mask, 1 bit per active stream index. + */ + DMUB_GPINT__IDLE_OPT_NOTIFY_STREAM_MASK = 8, }; //============================================================================== diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index 0cd78e745e7e..2c4a2fe9311d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -312,18 +312,3 @@ uint32_t dmub_dcn20_get_gpint_response(struct dmub_srv *dmub) { return REG_READ(DMCUB_SCRATCH7); } - -union dmub_fw_boot_status dmub_dcn20_get_fw_boot_status(struct dmub_srv *dmub) -{ - union dmub_fw_boot_status status; - - status.all = REG_READ(DMCUB_SCRATCH0); - return status; -} - -void dmub_dcn20_enable_dmub_boot_options(struct dmub_srv *dmub) -{ - union dmub_fw_boot_options boot_options = {0}; - - REG_WRITE(DMCUB_SCRATCH14, boot_options.all); -} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h index a27b509cd6fd..a316f260f6ac 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h @@ -192,8 +192,4 @@ bool dmub_dcn20_is_gpint_acked(struct dmub_srv *dmub, uint32_t dmub_dcn20_get_gpint_response(struct dmub_srv *dmub); -void dmub_dcn20_enable_dmub_boot_options(struct dmub_srv *dmub); - -union dmub_fw_boot_status dmub_dcn20_get_fw_boot_status(struct dmub_srv *dmub); - #endif /* _DMUB_DCN20_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c index a6047673c3f5..e8f488232e34 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c @@ -51,4 +51,14 @@ const struct dmub_srv_common_regs dmub_srv_dcn21_regs = { #undef DMUB_SF }; +/* Shared functions. */ +bool dmub_dcn21_is_auto_load_done(struct dmub_srv *dmub) +{ + return (REG_READ(DMCUB_SCRATCH0) == 3); +} + +bool dmub_dcn21_is_phy_init(struct dmub_srv *dmub) +{ + return REG_READ(DMCUB_SCRATCH10) == 0; +} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h index 8c4033ae4007..2bbea237137b 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.h @@ -32,4 +32,10 @@ extern const struct dmub_srv_common_regs dmub_srv_dcn21_regs; +/* Hardware functions. */ + +bool dmub_dcn21_is_auto_load_done(struct dmub_srv *dmub); + +bool dmub_dcn21_is_phy_init(struct dmub_srv *dmub); + #endif /* _DMUB_DCN21_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c index ba8d0bfb5522..215178b8d415 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c @@ -153,11 +153,22 @@ void dmub_dcn30_setup_windows(struct dmub_srv *dmub, offset = cw4->offset; - REG_WRITE(DMCUB_REGION4_OFFSET, offset.u.low_part); - REG_WRITE(DMCUB_REGION4_OFFSET_HIGH, offset.u.high_part); - REG_SET_2(DMCUB_REGION4_TOP_ADDRESS, 0, DMCUB_REGION4_TOP_ADDRESS, - cw4->region.top - cw4->region.base - 1, DMCUB_REGION4_ENABLE, - 1); + /* New firmware can support CW4. */ + if (dmub->fw_version > DMUB_FW_VERSION(1, 0, 10)) { + REG_WRITE(DMCUB_REGION3_CW4_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION3_CW4_OFFSET_HIGH, offset.u.high_part); + REG_WRITE(DMCUB_REGION3_CW4_BASE_ADDRESS, cw4->region.base); + REG_SET_2(DMCUB_REGION3_CW4_TOP_ADDRESS, 0, + DMCUB_REGION3_CW4_TOP_ADDRESS, cw4->region.top, + DMCUB_REGION3_CW4_ENABLE, 1); + } else { + REG_WRITE(DMCUB_REGION4_OFFSET, offset.u.low_part); + REG_WRITE(DMCUB_REGION4_OFFSET_HIGH, offset.u.high_part); + REG_SET_2(DMCUB_REGION4_TOP_ADDRESS, 0, + DMCUB_REGION4_TOP_ADDRESS, + cw4->region.top - cw4->region.base - 1, + DMCUB_REGION4_ENABLE, 1); + } offset = cw5->offset; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index aa41dfa23020..08da423b24a1 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -153,16 +153,18 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->set_gpint = dmub_dcn20_set_gpint; funcs->is_gpint_acked = dmub_dcn20_is_gpint_acked; funcs->get_gpint_response = dmub_dcn20_get_gpint_response; - funcs->get_fw_status = dmub_dcn20_get_fw_boot_status; - funcs->enable_dmub_boot_options = dmub_dcn20_enable_dmub_boot_options; - if (asic == DMUB_ASIC_DCN21) + if (asic == DMUB_ASIC_DCN21) { dmub->regs = &dmub_srv_dcn21_regs; + funcs->is_auto_load_done = dmub_dcn21_is_auto_load_done; + funcs->is_phy_init = dmub_dcn21_is_phy_init; + } #ifdef CONFIG_DRM_AMD_DC_DCN3_0 if (asic == DMUB_ASIC_DCN30) { dmub->regs = &dmub_srv_dcn30_regs; + funcs->is_auto_load_done = dmub_dcn30_is_auto_load_done; funcs->backdoor_load = dmub_dcn30_backdoor_load; funcs->setup_windows = dmub_dcn30_setup_windows; } @@ -462,10 +464,6 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, dmub_rb_init(&dmub->inbox1_rb, &rb_params); } - /* Report to DMUB what features are supported by current driver */ - if (dmub->hw_funcs.enable_dmub_boot_options) - dmub->hw_funcs.enable_dmub_boot_options(dmub); - if (dmub->hw_funcs.reset_release) dmub->hw_funcs.reset_release(dmub); @@ -526,10 +524,11 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, if (!dmub->hw_init) return DMUB_STATUS_INVALID; - for (i = 0; i <= timeout_us; i += 100) { - union dmub_fw_boot_status status = dmub->hw_funcs.get_fw_status(dmub); + if (!dmub->hw_funcs.is_auto_load_done) + return DMUB_STATUS_OK; - if (status.bits.dal_fw && status.bits.mailbox_rdy) + for (i = 0; i <= timeout_us; i += 100) { + if (dmub->hw_funcs.is_auto_load_done(dmub)) return DMUB_STATUS_OK; udelay(100); @@ -538,6 +537,27 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, return DMUB_STATUS_TIMEOUT; } +enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub, + uint32_t timeout_us) +{ + uint32_t i = 0; + + if (!dmub->hw_init) + return DMUB_STATUS_INVALID; + + if (!dmub->hw_funcs.is_phy_init) + return DMUB_STATUS_OK; + + for (i = 0; i <= timeout_us; i += 10) { + if (dmub->hw_funcs.is_phy_init(dmub)) + return DMUB_STATUS_OK; + + udelay(10); + } + + return DMUB_STATUS_TIMEOUT; +} + enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub, uint32_t timeout_us) { diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index d66f9d8eefb4..21bbee17c527 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -67,6 +67,7 @@ #define DC_LOG_ALL_GAMMA(...) pr_debug("[GAMMA]:"__VA_ARGS__) #define DC_LOG_ALL_TF_CHANNELS(...) pr_debug("[GAMMA]:"__VA_ARGS__) #define DC_LOG_DSC(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_SMU(...) pr_debug("[SMU_MSG]:"__VA_ARGS__) #define DC_LOG_DWB(...) DRM_DEBUG_KMS(__VA_ARGS__) struct dal_logger; @@ -113,6 +114,7 @@ enum dc_log_type { LOG_DISPLAYSTATS, LOG_HDMI_RETIMER_REDRIVER, LOG_DSC, + LOG_SMU_MSG, LOG_DWB, LOG_GAMMA_DEBUG, LOG_MAX_HW_POINTS, @@ -147,11 +149,11 @@ enum dc_log_type { (1ULL << LOG_I2C_AUX) | \ (1ULL << LOG_IF_TRACE) | \ (1ULL << LOG_HDMI_FRL) | \ + (1ULL << LOG_SCALER) | \ (1ULL << LOG_DTN) /* | \ (1ULL << LOG_DEBUG) | \ (1ULL << LOG_BIOS) | \ (1ULL << LOG_SURFACE) | \ - (1ULL << LOG_SCALER) | \ (1ULL << LOG_DML) | \ (1ULL << LOG_HW_LINK_TRAINING) | \ (1ULL << LOG_HW_AUDIO)| \ diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index eb7421e83b86..d3a5ba9ee782 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -32,7 +32,7 @@ #define MOD_FREESYNC_MAX_CONCURRENT_STREAMS 32 -#define MIN_REFRESH_RANGE_IN_US 10000000 +#define MIN_REFRESH_RANGE 10 /* Refresh rate ramp at a fixed rate of 65 Hz/second */ #define STATIC_SCREEN_RAMP_DELTA_REFRESH_RATE_PER_FRAME ((1000 / 60) * 65) /* Number of elements in the render times cache array */ @@ -790,7 +790,7 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, * Check if Freesync is supported. Return if false. If true, * set the corresponding bit in the info packet */ - if (!vrr->supported || (!vrr->send_info_frame)) + if (!vrr->send_info_frame) return; switch (packet_type) { @@ -878,8 +878,8 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, else in_out_vrr->fixed_refresh_in_uhz = 0; - refresh_range = in_out_vrr->max_refresh_in_uhz - - in_out_vrr->min_refresh_in_uhz; + refresh_range = div_u64(in_out_vrr->max_refresh_in_uhz + 500000, 1000000) - ++ div_u64(in_out_vrr->min_refresh_in_uhz + 500000, 1000000); in_out_vrr->supported = true; } @@ -918,7 +918,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->adjust.v_total_min = stream->timing.v_total; in_out_vrr->adjust.v_total_max = stream->timing.v_total; } else if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE && - refresh_range >= MIN_REFRESH_RANGE_IN_US) { + refresh_range >= MIN_REFRESH_RANGE) { in_out_vrr->adjust.v_total_min = calc_v_total_from_refresh(stream, @@ -1105,16 +1105,10 @@ unsigned long long mod_freesync_calc_nominal_field_rate( return nominal_field_rate_in_uhz; } -bool mod_freesync_is_valid_range(struct mod_freesync *mod_freesync, - const struct dc_stream_state *stream, - uint32_t min_refresh_cap_in_uhz, +bool mod_freesync_is_valid_range(uint32_t min_refresh_cap_in_uhz, uint32_t max_refresh_cap_in_uhz, - uint32_t min_refresh_request_in_uhz, - uint32_t max_refresh_request_in_uhz) + uint32_t nominal_field_rate_in_uhz) { - /* Calculate nominal field rate for stream */ - unsigned long long nominal_field_rate_in_uhz = - mod_freesync_calc_nominal_field_rate(stream); /* Typically nominal refresh calculated can have some fractional part. * Allow for some rounding error of actual video timing by taking floor @@ -1153,8 +1147,6 @@ bool mod_freesync_is_valid_range(struct mod_freesync *mod_freesync, div_u64(nominal_field_rate_in_uhz + 500000, 1000000); min_refresh_cap_in_uhz /= 1000000; max_refresh_cap_in_uhz /= 1000000; - min_refresh_request_in_uhz /= 1000000; - max_refresh_request_in_uhz /= 1000000; // Check nominal is within range if (nominal_field_rate_in_uhz > max_refresh_cap_in_uhz || @@ -1165,23 +1157,12 @@ bool mod_freesync_is_valid_range(struct mod_freesync *mod_freesync, if (nominal_field_rate_in_uhz < max_refresh_cap_in_uhz) max_refresh_cap_in_uhz = nominal_field_rate_in_uhz; - // Don't allow min > max - if (min_refresh_request_in_uhz > max_refresh_request_in_uhz) - return false; - // Check min is within range - if (min_refresh_request_in_uhz > max_refresh_cap_in_uhz || - min_refresh_request_in_uhz < min_refresh_cap_in_uhz) - return false; - - // Check max is within range - if (max_refresh_request_in_uhz > max_refresh_cap_in_uhz || - max_refresh_request_in_uhz < min_refresh_cap_in_uhz) + if (min_refresh_cap_in_uhz > max_refresh_cap_in_uhz) return false; // For variable range, check for at least 10 Hz range - if ((max_refresh_request_in_uhz != min_refresh_request_in_uhz) && - (max_refresh_request_in_uhz - min_refresh_request_in_uhz < 10)) + if (nominal_field_rate_in_uhz - min_refresh_cap_in_uhz < 10) return false; return true; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index 0ba3cf7f336a..c80fc10d732c 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -170,12 +170,9 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, unsigned long long mod_freesync_calc_nominal_field_rate( const struct dc_stream_state *stream); -bool mod_freesync_is_valid_range(struct mod_freesync *mod_freesync, - const struct dc_stream_state *stream, - uint32_t min_refresh_cap_in_uhz, +bool mod_freesync_is_valid_range(uint32_t min_refresh_cap_in_uhz, uint32_t max_refresh_cap_in_uhz, - uint32_t min_refresh_request_in_uhz, - uint32_t max_refresh_request_in_uhz); + uint32_t nominal_field_rate_in_uhz); diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index b852248b2da8..c2544c81dfb2 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1116,6 +1116,35 @@ struct atom_14nm_combphy_tmds_vs_set uint8_t margin_deemph_lane0__deemph_sel_val; }; +struct atom_DCN_dpphy_dvihdmi_tuningset +{ + uint32_t max_symclk_in10khz; + uint8_t encoder_mode; //atom_encode_mode_def, =2: DVI, =3: HDMI mode + uint8_t phy_sel; //bit vector of phy, bit0= phya, bit1=phyb, ....bit5 = phyf + uint8_t tx_eq_main; // map to RDPCSTX_PHY_FUSE0/1/2/3[5:0](EQ_MAIN) + uint8_t tx_eq_pre; // map to RDPCSTX_PHY_FUSE0/1/2/3[11:6](EQ_PRE) + uint8_t tx_eq_post; // map to RDPCSTX_PHY_FUSE0/1/2/3[17:12](EQ_POST) + uint8_t reserved1; + uint8_t tx_vboost_lvl; // tx_vboost_lvl, map to RDPCSTX_PHY_CNTL0.RDPCS_PHY_TX_VBOOST_LVL + uint8_t reserved2; +}; + +struct atom_DCN_dpphy_dp_setting{ + uint8_t dp_vs_pemph_level; //enum of atom_dp_vs_preemph_def + uint8_t tx_eq_main; // map to RDPCSTX_PHY_FUSE0/1/2/3[5:0](EQ_MAIN) + uint8_t tx_eq_pre; // map to RDPCSTX_PHY_FUSE0/1/2/3[11:6](EQ_PRE) + uint8_t tx_eq_post; // map to RDPCSTX_PHY_FUSE0/1/2/3[17:12](EQ_POST) + uint8_t tx_vboost_lvl; // tx_vboost_lvl, map to RDPCSTX_PHY_CNTL0.RDPCS_PHY_TX_VBOOST_LVL +}; + +struct atom_DCN_dpphy_dp_tuningset{ + uint8_t phy_sel; // bit vector of phy, bit0= phya, bit1=phyb, ....bit5 = phyf + uint8_t version; + uint16_t table_size; // size of atom_14nm_dpphy_dp_setting + uint16_t reserved; + struct atom_DCN_dpphy_dp_setting dptunings[10]; +}; + struct atom_i2c_reg_info { uint8_t ucI2cRegIndex; uint8_t ucI2cRegVal; @@ -1178,6 +1207,55 @@ struct atom_integrated_system_info_v1_11 uint32_t reserved[66]; }; +struct atom_integrated_system_info_v1_12 +{ + struct atom_common_table_header table_header; + uint32_t vbios_misc; //enum of atom_system_vbiosmisc_def + uint32_t gpucapinfo; //enum of atom_system_gpucapinf_def + uint32_t system_config; + uint32_t cpucapinfo; + uint16_t gpuclk_ss_percentage; //unit of 0.001%, 1000 mean 1% + uint16_t gpuclk_ss_type; + uint16_t lvds_ss_percentage; //unit of 0.001%, 1000 mean 1% + uint16_t lvds_ss_rate_10hz; + uint16_t hdmi_ss_percentage; //unit of 0.001%, 1000 mean 1% + uint16_t hdmi_ss_rate_10hz; + uint16_t dvi_ss_percentage; //unit of 0.001%, 1000 mean 1% + uint16_t dvi_ss_rate_10hz; + uint16_t dpphy_override; // bit vector, enum of atom_sysinfo_dpphy_override_def + uint16_t lvds_misc; // enum of atom_sys_info_lvds_misc_def + uint16_t backlight_pwm_hz; // pwm frequency in hz + uint8_t memorytype; // enum of atom_dmi_t17_mem_type_def, APU memory type indication. + uint8_t umachannelnumber; // number of memory channels + uint8_t pwr_on_digon_to_de; // all pwr sequence numbers below are in uint of 4ms // + uint8_t pwr_on_de_to_vary_bl; + uint8_t pwr_down_vary_bloff_to_de; + uint8_t pwr_down_de_to_digoff; + uint8_t pwr_off_delay; + uint8_t pwr_on_vary_bl_to_blon; + uint8_t pwr_down_bloff_to_vary_bloff; + uint8_t min_allowed_bl_level; + uint8_t htc_hyst_limit; + uint8_t htc_tmp_limit; + uint8_t reserved1; + uint8_t reserved2; + struct atom_external_display_connection_info extdispconninfo; + struct atom_DCN_dpphy_dvihdmi_tuningset TMDS_tuningset; + struct atom_DCN_dpphy_dvihdmi_tuningset hdmiCLK5_tuningset; + struct atom_DCN_dpphy_dvihdmi_tuningset hdmiCLK8_tuningset; + struct atom_DCN_dpphy_dp_tuningset rbr_tuningset; // rbr 1.62G dp tuning set + struct atom_DCN_dpphy_dp_tuningset hbr3_tuningset; // HBR3 dp tuning set + struct atom_camera_data camera_info; + struct atom_hdmi_retimer_redriver_set dp0_retimer_set; //for DP0 + struct atom_hdmi_retimer_redriver_set dp1_retimer_set; //for DP1 + struct atom_hdmi_retimer_redriver_set dp2_retimer_set; //for DP2 + struct atom_hdmi_retimer_redriver_set dp3_retimer_set; //for DP3 + struct atom_DCN_dpphy_dp_tuningset hbr_tuningset; //hbr 2.7G dp tuning set + struct atom_DCN_dpphy_dp_tuningset hbr2_tuningset; //hbr2 5.4G dp turnig set + struct atom_DCN_dpphy_dp_tuningset edp_tunings; //edp tuning set + struct atom_DCN_dpphy_dvihdmi_tuningset hdmiCLK6_tuningset; + uint32_t reserved[63]; +}; // system_config enum atom_system_vbiosmisc_def{ diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index fe4948aa662f..03125c8a2145 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -33,6 +33,7 @@ #include "navi10_ppt.h" #include "sienna_cichlid_ppt.h" #include "renoir_ppt.h" +#include "amd_pcie.h" /* * DO NOT use these for err/warn/info/debug messages. @@ -238,169 +239,48 @@ int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t return ret; } -int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max, bool lock_needed) +int smu_set_soft_freq_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t min, + uint32_t max) { int ret = 0; if (!smu_clk_dpm_is_enabled(smu, clk_type)) return 0; - if (lock_needed) - mutex_lock(&smu->mutex); - ret = smu_set_soft_freq_limited_range(smu, clk_type, min, max); - if (lock_needed) - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_set_hard_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max) -{ - int ret = 0, clk_id = 0; - uint32_t param; - - if (min <= 0 && max <= 0) - return -EINVAL; - - if (!smu_clk_dpm_is_enabled(smu, clk_type)) - return 0; - - clk_id = smu_clk_get_index(smu, clk_type); - if (clk_id < 0) - return clk_id; - - if (max > 0) { - param = (uint32_t)((clk_id << 16) | (max & 0xffff)); - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMaxByFreq, - param, NULL); - if (ret) - return ret; - } + mutex_lock(&smu->mutex); - if (min > 0) { - param = (uint32_t)((clk_id << 16) | (min & 0xffff)); - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinByFreq, - param, NULL); - if (ret) - return ret; - } + if (smu->ppt_funcs->set_soft_freq_limited_range) + ret = smu->ppt_funcs->set_soft_freq_limited_range(smu, + clk_type, + min, + max); + mutex_unlock(&smu->mutex); return ret; } -int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max, bool lock_needed) +int smu_get_dpm_freq_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) { - uint32_t clock_limit; int ret = 0; if (!min && !max) return -EINVAL; - if (lock_needed) - mutex_lock(&smu->mutex); - - if (!smu_clk_dpm_is_enabled(smu, clk_type)) { - switch (clk_type) { - case SMU_MCLK: - case SMU_UCLK: - clock_limit = smu->smu_table.boot_values.uclk; - break; - case SMU_GFXCLK: - case SMU_SCLK: - clock_limit = smu->smu_table.boot_values.gfxclk; - break; - case SMU_SOCCLK: - clock_limit = smu->smu_table.boot_values.socclk; - break; - default: - clock_limit = 0; - break; - } - - /* clock in Mhz unit */ - if (min) - *min = clock_limit / 100; - if (max) - *max = clock_limit / 100; - } else { - /* - * Todo: Use each asic(ASIC_ppt funcs) control the callbacks exposed to the - * core driver and then have helpers for stuff that is common(SMU_v11_x | SMU_v12_x funcs). - */ - ret = smu_get_dpm_ultimate_freq(smu, clk_type, min, max); - } - - if (lock_needed) - mutex_unlock(&smu->mutex); - - return ret; -} - -int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_type, - uint16_t level, uint32_t *value) -{ - int ret = 0, clk_id = 0; - uint32_t param; - - if (!value) - return -EINVAL; - - if (!smu_clk_dpm_is_enabled(smu, clk_type)) - return 0; - - clk_id = smu_clk_get_index(smu, clk_type); - if (clk_id < 0) - return clk_id; - - param = (uint32_t)(((clk_id & 0xffff) << 16) | (level & 0xffff)); - - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDpmFreqByIndex, - param, value); - if (ret) - return ret; - - /* BIT31: 0 - Fine grained DPM, 1 - Dicrete DPM - * now, we un-support it */ - *value = *value & 0x7fffffff; - - return ret; -} - -int smu_get_dpm_level_count(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *value) -{ - return smu_get_dpm_freq_by_index(smu, clk_type, 0xff, value); -} - -int smu_get_dpm_level_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min_value, uint32_t *max_value) -{ - int ret = 0; - uint32_t level_count = 0; - - if (!min_value && !max_value) - return -EINVAL; - - if (min_value) { - /* by default, level 0 clock value as min value */ - ret = smu_get_dpm_freq_by_index(smu, clk_type, 0, min_value); - if (ret) - return ret; - } + mutex_lock(&smu->mutex); - if (max_value) { - ret = smu_get_dpm_level_count(smu, clk_type, &level_count); - if (ret) - return ret; + if (smu->ppt_funcs->get_dpm_ultimate_freq) + ret = smu->ppt_funcs->get_dpm_ultimate_freq(smu, + clk_type, + min, + max); - ret = smu_get_dpm_freq_by_index(smu, clk_type, level_count - 1, max_value); - if (ret) - return ret; - } + mutex_unlock(&smu->mutex); return ret; } @@ -722,6 +602,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) smu->od_enabled =false; break; case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: sienna_cichlid_set_ppt_funcs(smu); break; case CHIP_RENOIR: @@ -1111,9 +992,37 @@ static int smu_sw_fini(void *handle) return 0; } +static int smu_get_thermal_temperature_range(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + struct smu_temperature_range *range = + &smu->thermal_range; + int ret = 0; + + if (!smu->ppt_funcs->get_thermal_temperature_range) + return 0; + + ret = smu->ppt_funcs->get_thermal_temperature_range(smu, range); + if (ret) + return ret; + + adev->pm.dpm.thermal.min_temp = range->min; + adev->pm.dpm.thermal.max_temp = range->max; + adev->pm.dpm.thermal.max_edge_emergency_temp = range->edge_emergency_max; + adev->pm.dpm.thermal.min_hotspot_temp = range->hotspot_min; + adev->pm.dpm.thermal.max_hotspot_crit_temp = range->hotspot_crit_max; + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range->hotspot_emergency_max; + adev->pm.dpm.thermal.min_mem_temp = range->mem_min; + adev->pm.dpm.thermal.max_mem_crit_temp = range->mem_crit_max; + adev->pm.dpm.thermal.max_mem_emergency_temp = range->mem_emergency_max; + + return ret; +} + static int smu_smc_hw_setup(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; + uint32_t pcie_gen = 0, pcie_width = 0; int ret; if (smu_is_dpm_running(smu) && adev->in_suspend) { @@ -1183,9 +1092,42 @@ static int smu_smc_hw_setup(struct smu_context *smu) if (!smu_is_dpm_running(smu)) dev_info(adev->dev, "dpm has been disabled\n"); - ret = smu_override_pcie_parameters(smu); - if (ret) + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 + * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 + * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 + */ + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 5; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + ret = smu_update_pcie_parameters(smu, pcie_gen, pcie_width); + if (ret) { + dev_err(adev->dev, "Attempt to override pcie params failed!\n"); + return ret; + } + + ret = smu_get_thermal_temperature_range(smu); + if (ret) { + dev_err(adev->dev, "Failed to get thermal temperature ranges!\n"); return ret; + } ret = smu_enable_thermal_alert(smu); if (ret) { @@ -1885,8 +1827,7 @@ int smu_set_display_count(struct smu_context *smu, uint32_t count) int smu_force_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t mask, - bool lock_needed) + uint32_t mask) { struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); int ret = 0; @@ -1899,14 +1840,12 @@ int smu_force_clk_levels(struct smu_context *smu, return -EINVAL; } - if (lock_needed) - mutex_lock(&smu->mutex); + mutex_lock(&smu->mutex); if (smu->ppt_funcs && smu->ppt_funcs->force_clk_levels) ret = smu->ppt_funcs->force_clk_levels(smu, clk_type, mask); - if (lock_needed) - mutex_unlock(&smu->mutex); + mutex_unlock(&smu->mutex); return ret; } @@ -2289,6 +2228,8 @@ int smu_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size) { + struct smu_umd_pstate_table *pstate_table = + &smu->pstate_table; int ret = 0; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) @@ -2301,11 +2242,11 @@ int smu_read_sensor(struct smu_context *smu, switch (sensor) { case AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK: - *((uint32_t *)data) = smu->pstate_sclk; + *((uint32_t *)data) = pstate_table->gfxclk_pstate.standard * 100; *size = 4; break; case AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK: - *((uint32_t *)data) = smu->pstate_mclk; + *((uint32_t *)data) = pstate_table->uclk_pstate.standard * 100; *size = 4; break; case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: @@ -2737,6 +2678,40 @@ int smu_baco_exit(struct smu_context *smu) return ret; } +bool smu_mode1_reset_is_support(struct smu_context *smu) +{ + bool ret = false; + + if (!smu->pm_enabled) + return false; + + mutex_lock(&smu->mutex); + + if (smu->ppt_funcs && smu->ppt_funcs->mode1_reset_is_support) + ret = smu->ppt_funcs->mode1_reset_is_support(smu); + + mutex_unlock(&smu->mutex); + + return ret; +} + +int smu_mode1_reset(struct smu_context *smu) +{ + int ret = 0; + + if (!smu->pm_enabled) + return -EOPNOTSUPP; + + mutex_lock(&smu->mutex); + + if (smu->ppt_funcs->mode1_reset) + ret = smu->ppt_funcs->mode1_reset(smu); + + mutex_unlock(&smu->mutex); + + return ret; +} + int smu_mode2_reset(struct smu_context *smu) { int ret = 0; diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index d93f8a43a96f..56dc20a617fd 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -56,10 +56,6 @@ #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) -#define CTF_OFFSET_EDGE 5 -#define CTF_OFFSET_HOTSPOT 5 -#define CTF_OFFSET_HBM 5 - #define MSG_MAP(msg, index, valid_in_vf) \ [SMU_MSG_##msg] = {1, (index), (valid_in_vf)} #define ARCTURUS_FEA_MAP(smu_feature, arcturus_feature) \ @@ -291,7 +287,6 @@ static int arcturus_get_pwr_src_index(struct smu_context *smc, uint32_t index) return mapping.map_to; } - static int arcturus_get_workload_type(struct smu_context *smu, enum PP_SMC_POWER_PROFILE profile) { struct smu_11_0_cmn2aisc_mapping mapping; @@ -338,23 +333,11 @@ static int arcturus_allocate_dpm_context(struct smu_context *smu) { struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - if (smu_dpm->dpm_context) - return -EINVAL; - - smu_dpm->dpm_context = kzalloc(sizeof(struct arcturus_dpm_table), + smu_dpm->dpm_context = kzalloc(sizeof(struct smu_11_0_dpm_context), GFP_KERNEL); if (!smu_dpm->dpm_context) return -ENOMEM; - - if (smu_dpm->golden_dpm_context) - return -EINVAL; - - smu_dpm->golden_dpm_context = kzalloc(sizeof(struct arcturus_dpm_table), - GFP_KERNEL); - if (!smu_dpm->golden_dpm_context) - return -ENOMEM; - - smu_dpm->dpm_context_size = sizeof(struct arcturus_dpm_table); + smu_dpm->dpm_context_size = sizeof(struct smu_11_0_dpm_context); smu_dpm->dpm_current_power_state = kzalloc(sizeof(struct smu_power_state), GFP_KERNEL); @@ -382,119 +365,84 @@ arcturus_get_allowed_feature_mask(struct smu_context *smu, return 0; } -static int -arcturus_set_single_dpm_table(struct smu_context *smu, - struct arcturus_single_dpm_table *single_dpm_table, - PPCLK_e clk_id) -{ - int ret = 0; - uint32_t i, num_of_levels = 0, clk; - - ret = smu_send_smc_msg_with_param(smu, - SMU_MSG_GetDpmFreqByIndex, - (clk_id << 16 | 0xFF), - &num_of_levels); - if (ret) { - dev_err(smu->adev->dev, "[%s] failed to get dpm levels!\n", __func__); - return ret; - } - - single_dpm_table->count = num_of_levels; - for (i = 0; i < num_of_levels; i++) { - ret = smu_send_smc_msg_with_param(smu, - SMU_MSG_GetDpmFreqByIndex, - (clk_id << 16 | i), - &clk); - if (ret) { - dev_err(smu->adev->dev, "[%s] failed to get dpm freq by index!\n", __func__); - return ret; - } - single_dpm_table->dpm_levels[i].value = clk; - single_dpm_table->dpm_levels[i].enabled = true; - } - return 0; -} - -static void arcturus_init_single_dpm_state(struct arcturus_dpm_state *dpm_state) -{ - dpm_state->soft_min_level = 0x0; - dpm_state->soft_max_level = 0xffff; - dpm_state->hard_min_level = 0x0; - dpm_state->hard_max_level = 0xffff; -} - static int arcturus_set_default_dpm_table(struct smu_context *smu) { - int ret; - - struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct arcturus_dpm_table *dpm_table = NULL; - struct arcturus_single_dpm_table *single_dpm_table; - - dpm_table = smu_dpm->dpm_context; + struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + PPTable_t *driver_ppt = smu->smu_table.driver_pptable; + struct smu_11_0_dpm_table *dpm_table = NULL; + int ret = 0; - /* socclk */ - single_dpm_table = &(dpm_table->soc_table); + /* socclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.soc_table; if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { - ret = arcturus_set_single_dpm_table(smu, single_dpm_table, - PPCLK_SOCCLK); - if (ret) { - dev_err(smu->adev->dev, "[%s] failed to get socclk dpm levels!\n", __func__); + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_SOCCLK, + dpm_table); + if (ret) return ret; - } + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_SOCCLK].SnapToDiscrete; } else { - single_dpm_table->count = 1; - single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.socclk / 100; + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.socclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; } - arcturus_init_single_dpm_state(&(single_dpm_table->dpm_state)); - /* gfxclk */ - single_dpm_table = &(dpm_table->gfx_table); + /* gfxclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.gfx_table; if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { - ret = arcturus_set_single_dpm_table(smu, single_dpm_table, - PPCLK_GFXCLK); - if (ret) { - dev_err(smu->adev->dev, "[SetupDefaultDpmTable] failed to get gfxclk dpm levels!"); + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_GFXCLK, + dpm_table); + if (ret) return ret; - } + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_GFXCLK].SnapToDiscrete; } else { - single_dpm_table->count = 1; - single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; } - arcturus_init_single_dpm_state(&(single_dpm_table->dpm_state)); - /* memclk */ - single_dpm_table = &(dpm_table->mem_table); + /* memclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.uclk_table; if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { - ret = arcturus_set_single_dpm_table(smu, single_dpm_table, - PPCLK_UCLK); - if (ret) { - dev_err(smu->adev->dev, "[SetupDefaultDpmTable] failed to get memclk dpm levels!"); + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_UCLK, + dpm_table); + if (ret) return ret; - } + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_UCLK].SnapToDiscrete; } else { - single_dpm_table->count = 1; - single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.uclk / 100; + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.uclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; } - arcturus_init_single_dpm_state(&(single_dpm_table->dpm_state)); - /* fclk */ - single_dpm_table = &(dpm_table->fclk_table); + /* fclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.fclk_table; if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_FCLK_BIT)) { - ret = arcturus_set_single_dpm_table(smu, single_dpm_table, - PPCLK_FCLK); - if (ret) { - dev_err(smu->adev->dev, "[SetupDefaultDpmTable] failed to get fclk dpm levels!"); + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_FCLK, + dpm_table); + if (ret) return ret; - } + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_FCLK].SnapToDiscrete; } else { - single_dpm_table->count = 1; - single_dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.fclk / 100; + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.fclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; } - arcturus_init_single_dpm_state(&(single_dpm_table->dpm_state)); - - memcpy(smu_dpm->golden_dpm_context, dpm_table, - sizeof(struct arcturus_dpm_table)); return 0; } @@ -596,33 +544,50 @@ static int arcturus_run_btc(struct smu_context *smu) static int arcturus_populate_umd_state_clk(struct smu_context *smu) { - struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct arcturus_dpm_table *dpm_table = NULL; - struct arcturus_single_dpm_table *gfx_table = NULL; - struct arcturus_single_dpm_table *mem_table = NULL; - - dpm_table = smu_dpm->dpm_context; - gfx_table = &(dpm_table->gfx_table); - mem_table = &(dpm_table->mem_table); - - smu->pstate_sclk = gfx_table->dpm_levels[0].value; - smu->pstate_mclk = mem_table->dpm_levels[0].value; + struct smu_11_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_11_0_dpm_table *gfx_table = + &dpm_context->dpm_tables.gfx_table; + struct smu_11_0_dpm_table *mem_table = + &dpm_context->dpm_tables.uclk_table; + struct smu_11_0_dpm_table *soc_table = + &dpm_context->dpm_tables.soc_table; + struct smu_umd_pstate_table *pstate_table = + &smu->pstate_table; + + pstate_table->gfxclk_pstate.min = gfx_table->min; + pstate_table->gfxclk_pstate.peak = gfx_table->max; + + pstate_table->uclk_pstate.min = mem_table->min; + pstate_table->uclk_pstate.peak = mem_table->max; + + pstate_table->socclk_pstate.min = soc_table->min; + pstate_table->socclk_pstate.peak = soc_table->max; if (gfx_table->count > ARCTURUS_UMD_PSTATE_GFXCLK_LEVEL && - mem_table->count > ARCTURUS_UMD_PSTATE_MCLK_LEVEL) { - smu->pstate_sclk = gfx_table->dpm_levels[ARCTURUS_UMD_PSTATE_GFXCLK_LEVEL].value; - smu->pstate_mclk = mem_table->dpm_levels[ARCTURUS_UMD_PSTATE_MCLK_LEVEL].value; + mem_table->count > ARCTURUS_UMD_PSTATE_MCLK_LEVEL && + soc_table->count > ARCTURUS_UMD_PSTATE_SOCCLK_LEVEL) { + pstate_table->gfxclk_pstate.standard = + gfx_table->dpm_levels[ARCTURUS_UMD_PSTATE_GFXCLK_LEVEL].value; + pstate_table->uclk_pstate.standard = + mem_table->dpm_levels[ARCTURUS_UMD_PSTATE_MCLK_LEVEL].value; + pstate_table->socclk_pstate.standard = + soc_table->dpm_levels[ARCTURUS_UMD_PSTATE_SOCCLK_LEVEL].value; + } else { + pstate_table->gfxclk_pstate.standard = + pstate_table->gfxclk_pstate.min; + pstate_table->uclk_pstate.standard = + pstate_table->uclk_pstate.min; + pstate_table->socclk_pstate.standard = + pstate_table->socclk_pstate.min; } - smu->pstate_sclk = smu->pstate_sclk * 100; - smu->pstate_mclk = smu->pstate_mclk * 100; - return 0; } static int arcturus_get_clk_table(struct smu_context *smu, struct pp_clock_levels_with_latency *clocks, - struct arcturus_single_dpm_table *dpm_table) + struct smu_11_0_dpm_table *dpm_table) { int i, count; @@ -644,30 +609,204 @@ static int arcturus_freqs_in_same_level(int32_t frequency1, return (abs(frequency1 - frequency2) <= EPSILON); } +static int arcturus_get_smu_metrics_data(struct smu_context *smu, + MetricsMember_t member, + uint32_t *value) +{ + struct smu_table_context *smu_table= &smu->smu_table; + SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; + int ret = 0; + + mutex_lock(&smu->metrics_lock); + + if (!smu_table->metrics_time || + time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(1))) { + ret = smu_update_table(smu, + SMU_TABLE_SMU_METRICS, + 0, + smu_table->metrics_table, + false); + if (ret) { + dev_info(smu->adev->dev, "Failed to export SMU metrics table!\n"); + mutex_unlock(&smu->metrics_lock); + return ret; + } + smu_table->metrics_time = jiffies; + } + + switch (member) { + case METRICS_CURR_GFXCLK: + *value = metrics->CurrClock[PPCLK_GFXCLK]; + break; + case METRICS_CURR_SOCCLK: + *value = metrics->CurrClock[PPCLK_SOCCLK]; + break; + case METRICS_CURR_UCLK: + *value = metrics->CurrClock[PPCLK_UCLK]; + break; + case METRICS_CURR_VCLK: + *value = metrics->CurrClock[PPCLK_VCLK]; + break; + case METRICS_CURR_DCLK: + *value = metrics->CurrClock[PPCLK_DCLK]; + break; + case METRICS_CURR_FCLK: + *value = metrics->CurrClock[PPCLK_FCLK]; + break; + case METRICS_AVERAGE_GFXCLK: + *value = metrics->AverageGfxclkFrequency; + break; + case METRICS_AVERAGE_SOCCLK: + *value = metrics->AverageSocclkFrequency; + break; + case METRICS_AVERAGE_UCLK: + *value = metrics->AverageUclkFrequency; + break; + case METRICS_AVERAGE_VCLK: + *value = metrics->AverageVclkFrequency; + break; + case METRICS_AVERAGE_DCLK: + *value = metrics->AverageDclkFrequency; + break; + case METRICS_AVERAGE_GFXACTIVITY: + *value = metrics->AverageGfxActivity; + break; + case METRICS_AVERAGE_MEMACTIVITY: + *value = metrics->AverageUclkActivity; + break; + case METRICS_AVERAGE_VCNACTIVITY: + *value = metrics->VcnActivityPercentage; + break; + case METRICS_AVERAGE_SOCKETPOWER: + *value = metrics->AverageSocketPower << 8; + break; + case METRICS_TEMPERATURE_EDGE: + *value = metrics->TemperatureEdge * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_HOTSPOT: + *value = metrics->TemperatureHotspot * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_MEM: + *value = metrics->TemperatureHBM * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_VRGFX: + *value = metrics->TemperatureVrGfx * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_VRSOC: + *value = metrics->TemperatureVrSoc * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_VRMEM: + *value = metrics->TemperatureVrMem * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_THROTTLER_STATUS: + *value = metrics->ThrottlerStatus; + break; + case METRICS_CURR_FANSPEED: + *value = metrics->CurrFanSpeed; + break; + default: + *value = UINT_MAX; + break; + } + + mutex_unlock(&smu->metrics_lock); + + return ret; +} + +static int arcturus_get_current_clk_freq_by_table(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *value) +{ + MetricsMember_t member_type; + int clk_id = 0; + + if (!value) + return -EINVAL; + + clk_id = smu_clk_get_index(smu, clk_type); + if (clk_id < 0) + return -EINVAL; + + switch (clk_id) { + case PPCLK_GFXCLK: + /* + * CurrClock[clk_id] can provide accurate + * output only when the dpm feature is enabled. + * We can use Average_* for dpm disabled case. + * But this is available for gfxclk/uclk/socclk/vclk/dclk. + */ + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) + member_type = METRICS_CURR_GFXCLK; + else + member_type = METRICS_AVERAGE_GFXCLK; + break; + case PPCLK_UCLK: + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) + member_type = METRICS_CURR_UCLK; + else + member_type = METRICS_AVERAGE_UCLK; + break; + case PPCLK_SOCCLK: + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) + member_type = METRICS_CURR_SOCCLK; + else + member_type = METRICS_AVERAGE_SOCCLK; + break; + case PPCLK_VCLK: + if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) + member_type = METRICS_CURR_VCLK; + else + member_type = METRICS_AVERAGE_VCLK; + break; + case PPCLK_DCLK: + if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) + member_type = METRICS_CURR_DCLK; + else + member_type = METRICS_AVERAGE_DCLK; + break; + case PPCLK_FCLK: + member_type = METRICS_CURR_FCLK; + break; + default: + return -EINVAL; + } + + return arcturus_get_smu_metrics_data(smu, + member_type, + value); +} + static int arcturus_print_clk_levels(struct smu_context *smu, enum smu_clk_type type, char *buf) { int i, now, size = 0; int ret = 0; struct pp_clock_levels_with_latency clocks; - struct arcturus_single_dpm_table *single_dpm_table; + struct smu_11_0_dpm_table *single_dpm_table; struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct arcturus_dpm_table *dpm_table = NULL; + struct smu_11_0_dpm_context *dpm_context = NULL; if (amdgpu_ras_intr_triggered()) return snprintf(buf, PAGE_SIZE, "unavailable\n"); - dpm_table = smu_dpm->dpm_context; + dpm_context = smu_dpm->dpm_context; switch (type) { case SMU_SCLK: - ret = smu_get_current_clk_freq(smu, SMU_GFXCLK, &now); + ret = arcturus_get_current_clk_freq_by_table(smu, SMU_GFXCLK, &now); if (ret) { dev_err(smu->adev->dev, "Attempt to get current gfx clk Failed!"); return ret; } - single_dpm_table = &(dpm_table->gfx_table); + single_dpm_table = &(dpm_context->dpm_tables.gfx_table); ret = arcturus_get_clk_table(smu, &clocks, single_dpm_table); if (ret) { dev_err(smu->adev->dev, "Attempt to get gfx clk levels Failed!"); @@ -684,17 +823,17 @@ static int arcturus_print_clk_levels(struct smu_context *smu, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now) ? "*" : "")); break; case SMU_MCLK: - ret = smu_get_current_clk_freq(smu, SMU_UCLK, &now); + ret = arcturus_get_current_clk_freq_by_table(smu, SMU_UCLK, &now); if (ret) { dev_err(smu->adev->dev, "Attempt to get current mclk Failed!"); return ret; } - single_dpm_table = &(dpm_table->mem_table); + single_dpm_table = &(dpm_context->dpm_tables.uclk_table); ret = arcturus_get_clk_table(smu, &clocks, single_dpm_table); if (ret) { dev_err(smu->adev->dev, "Attempt to get memory clk levels Failed!"); @@ -707,17 +846,17 @@ static int arcturus_print_clk_levels(struct smu_context *smu, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now) ? "*" : "")); break; case SMU_SOCCLK: - ret = smu_get_current_clk_freq(smu, SMU_SOCCLK, &now); + ret = arcturus_get_current_clk_freq_by_table(smu, SMU_SOCCLK, &now); if (ret) { dev_err(smu->adev->dev, "Attempt to get current socclk Failed!"); return ret; } - single_dpm_table = &(dpm_table->soc_table); + single_dpm_table = &(dpm_context->dpm_tables.soc_table); ret = arcturus_get_clk_table(smu, &clocks, single_dpm_table); if (ret) { dev_err(smu->adev->dev, "Attempt to get socclk levels Failed!"); @@ -730,17 +869,17 @@ static int arcturus_print_clk_levels(struct smu_context *smu, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now) ? "*" : "")); break; case SMU_FCLK: - ret = smu_get_current_clk_freq(smu, SMU_FCLK, &now); + ret = arcturus_get_current_clk_freq_by_table(smu, SMU_FCLK, &now); if (ret) { dev_err(smu->adev->dev, "Attempt to get current fclk Failed!"); return ret; } - single_dpm_table = &(dpm_table->fclk_table); + single_dpm_table = &(dpm_context->dpm_tables.fclk_table); ret = arcturus_get_clk_table(smu, &clocks, single_dpm_table); if (ret) { dev_err(smu->adev->dev, "Attempt to get fclk levels Failed!"); @@ -753,7 +892,7 @@ static int arcturus_print_clk_levels(struct smu_context *smu, (clocks.num_levels == 1) ? "*" : (arcturus_freqs_in_same_level( clocks.data[i].clocks_in_khz / 1000, - now / 100) ? "*" : "")); + now) ? "*" : "")); break; default: @@ -763,20 +902,19 @@ static int arcturus_print_clk_levels(struct smu_context *smu, return size; } -static int arcturus_upload_dpm_level(struct smu_context *smu, bool max, - uint32_t feature_mask) +static int arcturus_upload_dpm_level(struct smu_context *smu, + bool max, + uint32_t feature_mask, + uint32_t level) { - struct arcturus_single_dpm_table *single_dpm_table; - struct arcturus_dpm_table *dpm_table = + struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; uint32_t freq; int ret = 0; if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) && (feature_mask & FEATURE_DPM_GFXCLK_MASK)) { - single_dpm_table = &(dpm_table->gfx_table); - freq = max ? single_dpm_table->dpm_state.soft_max_level : - single_dpm_table->dpm_state.soft_min_level; + freq = dpm_context->dpm_tables.gfx_table.dpm_levels[level].value; ret = smu_send_smc_msg_with_param(smu, (max ? SMU_MSG_SetSoftMaxByFreq : SMU_MSG_SetSoftMinByFreq), (PPCLK_GFXCLK << 16) | (freq & 0xffff), @@ -790,9 +928,7 @@ static int arcturus_upload_dpm_level(struct smu_context *smu, bool max, if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) && (feature_mask & FEATURE_DPM_UCLK_MASK)) { - single_dpm_table = &(dpm_table->mem_table); - freq = max ? single_dpm_table->dpm_state.soft_max_level : - single_dpm_table->dpm_state.soft_min_level; + freq = dpm_context->dpm_tables.uclk_table.dpm_levels[level].value; ret = smu_send_smc_msg_with_param(smu, (max ? SMU_MSG_SetSoftMaxByFreq : SMU_MSG_SetSoftMinByFreq), (PPCLK_UCLK << 16) | (freq & 0xffff), @@ -806,9 +942,7 @@ static int arcturus_upload_dpm_level(struct smu_context *smu, bool max, if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT) && (feature_mask & FEATURE_DPM_SOCCLK_MASK)) { - single_dpm_table = &(dpm_table->soc_table); - freq = max ? single_dpm_table->dpm_state.soft_max_level : - single_dpm_table->dpm_state.soft_min_level; + freq = dpm_context->dpm_tables.soc_table.dpm_levels[level].value; ret = smu_send_smc_msg_with_param(smu, (max ? SMU_MSG_SetSoftMaxByFreq : SMU_MSG_SetSoftMinByFreq), (PPCLK_SOCCLK << 16) | (freq & 0xffff), @@ -826,8 +960,8 @@ static int arcturus_upload_dpm_level(struct smu_context *smu, bool max, static int arcturus_force_clk_levels(struct smu_context *smu, enum smu_clk_type type, uint32_t mask) { - struct arcturus_dpm_table *dpm_table; - struct arcturus_single_dpm_table *single_dpm_table; + struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + struct smu_11_0_dpm_table *single_dpm_table = NULL; uint32_t soft_min_level, soft_max_level; uint32_t smu_version; int ret = 0; @@ -847,12 +981,9 @@ static int arcturus_force_clk_levels(struct smu_context *smu, soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; - dpm_table = smu->smu_dpm.dpm_context; - switch (type) { case SMU_SCLK: - single_dpm_table = &(dpm_table->gfx_table); - + single_dpm_table = &(dpm_context->dpm_tables.gfx_table); if (soft_max_level >= single_dpm_table->count) { dev_err(smu->adev->dev, "Clock level specified %d is over max allowed %d\n", soft_max_level, single_dpm_table->count - 1); @@ -860,18 +991,19 @@ static int arcturus_force_clk_levels(struct smu_context *smu, break; } - single_dpm_table->dpm_state.soft_min_level = - single_dpm_table->dpm_levels[soft_min_level].value; - single_dpm_table->dpm_state.soft_max_level = - single_dpm_table->dpm_levels[soft_max_level].value; - - ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK); + ret = arcturus_upload_dpm_level(smu, + false, + FEATURE_DPM_GFXCLK_MASK, + soft_min_level); if (ret) { dev_err(smu->adev->dev, "Failed to upload boot level to lowest!\n"); break; } - ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK); + ret = arcturus_upload_dpm_level(smu, + true, + FEATURE_DPM_GFXCLK_MASK, + soft_max_level); if (ret) dev_err(smu->adev->dev, "Failed to upload dpm max level to highest!\n"); @@ -897,11 +1029,16 @@ static int arcturus_force_clk_levels(struct smu_context *smu, static int arcturus_get_thermal_temperature_range(struct smu_context *smu, struct smu_temperature_range *range) { + struct smu_table_context *table_context = &smu->smu_table; + struct smu_11_0_powerplay_table *powerplay_table = + table_context->power_play_table; PPTable_t *pptable = smu->smu_table.driver_pptable; if (!range) return -EINVAL; + memcpy(range, &smu11_thermal_policy[0], sizeof(struct smu_temperature_range)); + range->max = pptable->TedgeLimit * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) * @@ -912,123 +1049,13 @@ static int arcturus_get_thermal_temperature_range(struct smu_context *smu, SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->mem_crit_max = pptable->TmemLimit * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - range->mem_emergency_max = (pptable->TmemLimit + CTF_OFFSET_HBM)* + range->mem_emergency_max = (pptable->TmemLimit + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->software_shutdown_temp = powerplay_table->software_shutdown_temp; return 0; } -static int arcturus_get_smu_metrics_data(struct smu_context *smu, - MetricsMember_t member, - uint32_t *value) -{ - struct smu_table_context *smu_table= &smu->smu_table; - SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; - int ret = 0; - - mutex_lock(&smu->metrics_lock); - - if (!smu_table->metrics_time || - time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(1))) { - ret = smu_update_table(smu, - SMU_TABLE_SMU_METRICS, - 0, - smu_table->metrics_table, - false); - if (ret) { - dev_info(smu->adev->dev, "Failed to export SMU metrics table!\n"); - mutex_unlock(&smu->metrics_lock); - return ret; - } - smu_table->metrics_time = jiffies; - } - - switch (member) { - case METRICS_CURR_GFXCLK: - *value = metrics->CurrClock[PPCLK_GFXCLK]; - break; - case METRICS_CURR_SOCCLK: - *value = metrics->CurrClock[PPCLK_SOCCLK]; - break; - case METRICS_CURR_UCLK: - *value = metrics->CurrClock[PPCLK_UCLK]; - break; - case METRICS_CURR_VCLK: - *value = metrics->CurrClock[PPCLK_VCLK]; - break; - case METRICS_CURR_DCLK: - *value = metrics->CurrClock[PPCLK_DCLK]; - break; - case METRICS_CURR_FCLK: - *value = metrics->CurrClock[PPCLK_FCLK]; - break; - case METRICS_AVERAGE_GFXCLK: - *value = metrics->AverageGfxclkFrequency; - break; - case METRICS_AVERAGE_SOCCLK: - *value = metrics->AverageSocclkFrequency; - break; - case METRICS_AVERAGE_UCLK: - *value = metrics->AverageUclkFrequency; - break; - case METRICS_AVERAGE_VCLK: - *value = metrics->AverageVclkFrequency; - break; - case METRICS_AVERAGE_DCLK: - *value = metrics->AverageDclkFrequency; - break; - case METRICS_AVERAGE_GFXACTIVITY: - *value = metrics->AverageGfxActivity; - break; - case METRICS_AVERAGE_MEMACTIVITY: - *value = metrics->AverageUclkActivity; - break; - case METRICS_AVERAGE_VCNACTIVITY: - *value = metrics->VcnActivityPercentage; - break; - case METRICS_AVERAGE_SOCKETPOWER: - *value = metrics->AverageSocketPower << 8; - break; - case METRICS_TEMPERATURE_EDGE: - *value = metrics->TemperatureEdge * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_TEMPERATURE_HOTSPOT: - *value = metrics->TemperatureHotspot * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_TEMPERATURE_MEM: - *value = metrics->TemperatureHBM * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_TEMPERATURE_VRGFX: - *value = metrics->TemperatureVrGfx * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_TEMPERATURE_VRSOC: - *value = metrics->TemperatureVrSoc * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_TEMPERATURE_VRMEM: - *value = metrics->TemperatureVrMem * - SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; - break; - case METRICS_THROTTLER_STATUS: - *value = metrics->ThrottlerStatus; - break; - case METRICS_CURR_FANSPEED: - *value = metrics->CurrFanSpeed; - break; - default: - *value = UINT_MAX; - break; - } - - mutex_unlock(&smu->metrics_lock); - - return ret; -} - static int arcturus_get_current_activity_percent(struct smu_context *smu, enum amd_pp_sensors sensor, uint32_t *value) @@ -1138,8 +1165,24 @@ static int arcturus_read_sensor(struct smu_context *smu, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_GFX_MCLK: + ret = arcturus_get_current_clk_freq_by_table(smu, SMU_UCLK, (uint32_t *)data); + /* the output clock frequency in 10K unit */ + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_SCLK: + ret = arcturus_get_current_clk_freq_by_table(smu, SMU_GFXCLK, (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDGFX: + ret = smu_v11_0_get_gfx_vdd(smu, (uint32_t *)data); + *size = 4; + break; default: - ret = smu_v11_0_read_sensor(smu, sensor, data, size); + ret = -EOPNOTSUPP; + break; } mutex_unlock(&smu->sensor_lock); @@ -1177,238 +1220,6 @@ static int arcturus_get_fan_speed_percent(struct smu_context *smu, return ret; } -static int arcturus_get_current_clk_freq_by_table(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t *value) -{ - MetricsMember_t member_type; - int clk_id = 0; - - if (!value) - return -EINVAL; - - clk_id = smu_clk_get_index(smu, clk_type); - if (clk_id < 0) - return -EINVAL; - - switch (clk_id) { - case PPCLK_GFXCLK: - /* - * CurrClock[clk_id] can provide accurate - * output only when the dpm feature is enabled. - * We can use Average_* for dpm disabled case. - * But this is available for gfxclk/uclk/socclk/vclk/dclk. - */ - if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) - member_type = METRICS_CURR_GFXCLK; - else - member_type = METRICS_AVERAGE_GFXCLK; - break; - case PPCLK_UCLK: - if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) - member_type = METRICS_CURR_UCLK; - else - member_type = METRICS_AVERAGE_UCLK; - break; - case PPCLK_SOCCLK: - if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) - member_type = METRICS_CURR_SOCCLK; - else - member_type = METRICS_AVERAGE_SOCCLK; - break; - case PPCLK_VCLK: - if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) - member_type = METRICS_CURR_VCLK; - else - member_type = METRICS_AVERAGE_VCLK; - break; - case PPCLK_DCLK: - if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) - member_type = METRICS_CURR_DCLK; - else - member_type = METRICS_AVERAGE_DCLK; - break; - case PPCLK_FCLK: - member_type = METRICS_CURR_FCLK; - break; - default: - return -EINVAL; - } - - return arcturus_get_smu_metrics_data(smu, - member_type, - value); -} - -static uint32_t arcturus_find_lowest_dpm_level(struct arcturus_single_dpm_table *table) -{ - uint32_t i; - - for (i = 0; i < table->count; i++) { - if (table->dpm_levels[i].enabled) - break; - } - if (i >= table->count) { - i = 0; - table->dpm_levels[i].enabled = true; - } - - return i; -} - -static uint32_t arcturus_find_highest_dpm_level(struct smu_context *smu, - struct arcturus_single_dpm_table *table) -{ - int i = 0; - - if (table->count <= 0) { - dev_err(smu->adev->dev, "[%s] DPM Table has no entry!", __func__); - return 0; - } - if (table->count > MAX_DPM_NUMBER) { - dev_err(smu->adev->dev, "[%s] DPM Table has too many entries!", __func__); - return MAX_DPM_NUMBER - 1; - } - - for (i = table->count - 1; i >= 0; i--) { - if (table->dpm_levels[i].enabled) - break; - } - if (i < 0) { - i = 0; - table->dpm_levels[i].enabled = true; - } - - return i; -} - - - -static int arcturus_force_dpm_limit_value(struct smu_context *smu, bool highest) -{ - struct arcturus_dpm_table *dpm_table = - (struct arcturus_dpm_table *)smu->smu_dpm.dpm_context; - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(smu->adev, 0); - uint32_t soft_level; - int ret = 0; - - /* gfxclk */ - if (highest) - soft_level = arcturus_find_highest_dpm_level(smu, &(dpm_table->gfx_table)); - else - soft_level = arcturus_find_lowest_dpm_level(&(dpm_table->gfx_table)); - - dpm_table->gfx_table.dpm_state.soft_min_level = - dpm_table->gfx_table.dpm_state.soft_max_level = - dpm_table->gfx_table.dpm_levels[soft_level].value; - - ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK); - if (ret) { - dev_err(smu->adev->dev, "Failed to upload boot level to %s!\n", - highest ? "highest" : "lowest"); - return ret; - } - - ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK); - if (ret) { - dev_err(smu->adev->dev, "Failed to upload dpm max level to %s!\n!", - highest ? "highest" : "lowest"); - return ret; - } - - if (hive) - /* - * Force XGMI Pstate to highest or lowest - * TODO: revise this when xgmi dpm is functional - */ - ret = smu_v11_0_set_xgmi_pstate(smu, highest ? 1 : 0); - - return ret; -} - -static int arcturus_unforce_dpm_levels(struct smu_context *smu) -{ - struct arcturus_dpm_table *dpm_table = - (struct arcturus_dpm_table *)smu->smu_dpm.dpm_context; - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(smu->adev, 0); - uint32_t soft_min_level, soft_max_level; - int ret = 0; - - /* gfxclk */ - soft_min_level = arcturus_find_lowest_dpm_level(&(dpm_table->gfx_table)); - soft_max_level = arcturus_find_highest_dpm_level(smu, &(dpm_table->gfx_table)); - dpm_table->gfx_table.dpm_state.soft_min_level = - dpm_table->gfx_table.dpm_levels[soft_min_level].value; - dpm_table->gfx_table.dpm_state.soft_max_level = - dpm_table->gfx_table.dpm_levels[soft_max_level].value; - - ret = arcturus_upload_dpm_level(smu, false, FEATURE_DPM_GFXCLK_MASK); - if (ret) { - dev_err(smu->adev->dev, "Failed to upload DPM Bootup Levels!"); - return ret; - } - - ret = arcturus_upload_dpm_level(smu, true, FEATURE_DPM_GFXCLK_MASK); - if (ret) { - dev_err(smu->adev->dev, "Failed to upload DPM Max Levels!"); - return ret; - } - - if (hive) - /* - * Reset XGMI Pstate back to default - * TODO: revise this when xgmi dpm is functional - */ - ret = smu_v11_0_set_xgmi_pstate(smu, 0); - - return ret; -} - -static int -arcturus_get_profiling_clk_mask(struct smu_context *smu, - enum amd_dpm_forced_level level, - uint32_t *sclk_mask, - uint32_t *mclk_mask, - uint32_t *soc_mask) -{ - struct arcturus_dpm_table *dpm_table = - (struct arcturus_dpm_table *)smu->smu_dpm.dpm_context; - struct arcturus_single_dpm_table *gfx_dpm_table; - struct arcturus_single_dpm_table *mem_dpm_table; - struct arcturus_single_dpm_table *soc_dpm_table; - - if (!smu->smu_dpm.dpm_context) - return -EINVAL; - - gfx_dpm_table = &dpm_table->gfx_table; - mem_dpm_table = &dpm_table->mem_table; - soc_dpm_table = &dpm_table->soc_table; - - *sclk_mask = 0; - *mclk_mask = 0; - *soc_mask = 0; - - if (gfx_dpm_table->count > ARCTURUS_UMD_PSTATE_GFXCLK_LEVEL && - mem_dpm_table->count > ARCTURUS_UMD_PSTATE_MCLK_LEVEL && - soc_dpm_table->count > ARCTURUS_UMD_PSTATE_SOCCLK_LEVEL) { - *sclk_mask = ARCTURUS_UMD_PSTATE_GFXCLK_LEVEL; - *mclk_mask = ARCTURUS_UMD_PSTATE_MCLK_LEVEL; - *soc_mask = ARCTURUS_UMD_PSTATE_SOCCLK_LEVEL; - } - - if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { - *sclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { - *mclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { - *sclk_mask = gfx_dpm_table->count - 1; - *mclk_mask = mem_dpm_table->count - 1; - *soc_mask = soc_dpm_table->count - 1; - } - - return 0; -} - static int arcturus_get_power_limit(struct smu_context *smu) { struct smu_11_0_powerplay_table *powerplay_table = @@ -2136,7 +1947,6 @@ static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) return ret; } - static void arcturus_fill_eeprom_i2c_req(SwI2cRequest_t *req, bool write, uint8_t address, uint32_t numbytes, uint8_t *data) @@ -2504,37 +2314,6 @@ static void arcturus_log_thermal_throttling_event(struct smu_context *smu) log_buf); } -static int arcturus_set_thermal_range(struct smu_context *smu, - struct smu_temperature_range range) -{ - struct amdgpu_device *adev = smu->adev; - int low = SMU_THERMAL_MINIMUM_ALERT_TEMP; - int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP; - uint32_t val; - struct smu_table_context *table_context = &smu->smu_table; - struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; - - low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, - range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); - high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); - - if (low > high) - return -EINVAL; - - val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high & 0xff)); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low & 0xff)); - val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); - - WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); - - return 0; -} - static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2554,15 +2333,11 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .set_default_dpm_table = arcturus_set_default_dpm_table, .populate_umd_state_clk = arcturus_populate_umd_state_clk, .get_thermal_temperature_range = arcturus_get_thermal_temperature_range, - .get_current_clk_freq_by_table = arcturus_get_current_clk_freq_by_table, .print_clk_levels = arcturus_print_clk_levels, .force_clk_levels = arcturus_force_clk_levels, .read_sensor = arcturus_read_sensor, .get_fan_speed_percent = arcturus_get_fan_speed_percent, .get_fan_speed_rpm = arcturus_get_fan_speed_rpm, - .force_dpm_limit_value = arcturus_force_dpm_limit_value, - .unforce_dpm_levels = arcturus_unforce_dpm_levels, - .get_profiling_clk_mask = arcturus_get_profiling_clk_mask, .get_power_profile_mode = arcturus_get_power_profile_mode, .set_power_profile_mode = arcturus_set_power_profile_mode, .set_performance_level = arcturus_set_performance_level, @@ -2597,7 +2372,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_enabled_mask = smu_v11_0_get_enabled_mask, .notify_display_change = NULL, .set_power_limit = smu_v11_0_set_power_limit, - .get_current_clk_freq = smu_v11_0_get_current_clk_freq, .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, .enable_thermal_alert = smu_v11_0_enable_thermal_alert, .disable_thermal_alert = smu_v11_0_disable_thermal_alert, @@ -2619,11 +2393,9 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .baco_exit = smu_v11_0_baco_exit, .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, - .override_pcie_parameters = NULL, .set_df_cstate = arcturus_set_df_cstate, .allow_xgmi_power_down = arcturus_allow_xgmi_power_down, .log_thermal_throttling_event = arcturus_log_thermal_throttling_event, - .set_thermal_range = arcturus_set_thermal_range, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 7b349e038972..70181ba7ee0c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -119,6 +119,7 @@ struct smu_temperature_range { int mem_min; int mem_crit_max; int mem_emergency_max; + int software_shutdown_temp; }; struct smu_state_validation_block { @@ -145,7 +146,6 @@ struct smu_power_state { struct smu_state_pcie_block pcie; struct smu_state_display_block display; struct smu_state_memroy_block memory; - struct smu_temperature_range temperatures; struct smu_state_software_algorithm_block software; struct smu_uvd_clocks uvd_clocks; struct smu_hw_power_state hardware; @@ -352,6 +352,20 @@ struct smu_baco_context bool platform_support; }; +struct pstates_clk_freq { + uint32_t min; + uint32_t standard; + uint32_t peak; +}; + +struct smu_umd_pstate_table { + struct pstates_clk_freq gfxclk_pstate; + struct pstates_clk_freq socclk_pstate; + struct pstates_clk_freq uclk_pstate; + struct pstates_clk_freq vclk_pstate; + struct pstates_clk_freq dclk_pstate; +}; + #define WORKLOAD_POLICY_MAX 7 struct smu_context { @@ -371,11 +385,13 @@ struct smu_context struct smu_feature smu_feature; struct amd_pp_display_configuration *display_config; struct smu_baco_context smu_baco; + struct smu_temperature_range thermal_range; void *od_settings; #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_sclk; #endif + struct smu_umd_pstate_table pstate_table; uint32_t pstate_sclk; uint32_t pstate_mclk; @@ -461,24 +477,13 @@ struct pptable_funcs { int (*display_config_changed)(struct smu_context *smu); int (*apply_clocks_adjust_rules)(struct smu_context *smu); int (*notify_smc_display_config)(struct smu_context *smu); - int (*force_dpm_limit_value)(struct smu_context *smu, bool highest); - int (*unforce_dpm_levels)(struct smu_context *smu); - int (*get_profiling_clk_mask)(struct smu_context *smu, - enum amd_dpm_forced_level level, - uint32_t *sclk_mask, - uint32_t *mclk_mask, - uint32_t *soc_mask); int (*set_cpu_power_state)(struct smu_context *smu); bool (*is_dpm_running)(struct smu_context *smu); int (*tables_init)(struct smu_context *smu, struct smu_table *tables); - int (*set_thermal_fan_table)(struct smu_context *smu); int (*get_fan_speed_percent)(struct smu_context *smu, uint32_t *speed); int (*get_fan_speed_rpm)(struct smu_context *smu, uint32_t *speed); int (*set_watermarks_table)(struct smu_context *smu, void *watermarks, struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges); - int (*get_current_clk_freq_by_table)(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t *value); int (*get_thermal_temperature_range)(struct smu_context *smu, struct smu_temperature_range *range); int (*get_uclk_dpm_states)(struct smu_context *smu, uint32_t *clocks_in_khz, uint32_t *num_states); int (*set_default_od_settings)(struct smu_context *smu); @@ -486,8 +491,6 @@ struct pptable_funcs { int (*display_disable_memory_clock_switch)(struct smu_context *smu, bool disable_memory_clock_switch); void (*dump_pptable)(struct smu_context *smu); int (*get_power_limit)(struct smu_context *smu); - int (*get_dpm_clk_limited)(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t dpm_level, uint32_t *freq); int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state); int (*allow_xgmi_power_down)(struct smu_context *smu, bool en); int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap); @@ -521,7 +524,6 @@ struct pptable_funcs { int (*get_enabled_mask)(struct smu_context *smu, uint32_t *feature_mask, uint32_t num); int (*notify_display_change)(struct smu_context *smu); int (*set_power_limit)(struct smu_context *smu, uint32_t n); - int (*get_current_clk_freq)(struct smu_context *smu, enum smu_clk_type clk_id, uint32_t *value); int (*init_max_sustainable_clocks)(struct smu_context *smu); int (*enable_thermal_alert)(struct smu_context *smu); int (*disable_thermal_alert)(struct smu_context *smu); @@ -561,14 +563,14 @@ struct pptable_funcs { int (*baco_set_state)(struct smu_context *smu, enum smu_baco_state state); int (*baco_enter)(struct smu_context *smu); int (*baco_exit)(struct smu_context *smu); + bool (*mode1_reset_is_support)(struct smu_context *smu); + int (*mode1_reset)(struct smu_context *smu); int (*mode2_reset)(struct smu_context *smu); int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); - int (*override_pcie_parameters)(struct smu_context *smu); int (*disable_umc_cdr_12gbps_workaround)(struct smu_context *smu); int (*set_power_source)(struct smu_context *smu, enum smu_power_src_type power_src); void (*log_thermal_throttling_event)(struct smu_context *smu); - int (*set_thermal_range)(struct smu_context *smu, struct smu_temperature_range range); }; typedef enum { @@ -672,6 +674,8 @@ int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state); int smu_baco_enter(struct smu_context *smu); int smu_baco_exit(struct smu_context *smu); +bool smu_mode1_reset_is_support(struct smu_context *smu); +int smu_mode1_reset(struct smu_context *smu); int smu_mode2_reset(struct smu_context *smu); extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table, @@ -719,18 +723,10 @@ int smu_switch_power_profile(struct smu_context *smu, enum PP_SMC_POWER_PROFILE type, bool en); int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version); -int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_type, - uint16_t level, uint32_t *value); -int smu_get_dpm_level_count(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *value); int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max, bool lock_needed); + uint32_t *min, uint32_t *max); int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max, bool lock_needed); -int smu_set_hard_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); -int smu_get_dpm_level_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min_value, uint32_t *max_value); enum amd_dpm_forced_level smu_get_performance_level(struct smu_context *smu); int smu_force_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level); int smu_set_display_count(struct smu_context *smu, uint32_t count); @@ -742,8 +738,7 @@ size_t smu_sys_get_pp_feature_mask(struct smu_context *smu, char *buf); int smu_sys_set_pp_feature_mask(struct smu_context *smu, uint64_t new_mask); int smu_force_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t mask, - bool lock_needed); + uint32_t mask); int smu_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); int smu_set_df_cstate(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h index 5322f6da3071..b2232e24d82f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_sienna_cichlid.h @@ -27,9 +27,9 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU11_DRIVER_IF_VERSION 0x31 +#define SMU11_DRIVER_IF_VERSION 0x33 -#define PPTABLE_Sienna_Cichlid_SMU_VERSION 4 +#define PPTABLE_Sienna_Cichlid_SMU_VERSION 5 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SMNCLK_DPM_LEVELS 2 @@ -128,7 +128,7 @@ #define FEATURE_2_STEP_PSTATE_BIT 46 #define FEATURE_SMNCLK_DPM_BIT 47 #define FEATURE_SPARE_48_BIT 48 -#define FEATURE_SPARE_49_BIT 49 +#define FEATURE_GFX_EDC_BIT 49 #define FEATURE_SPARE_50_BIT 50 #define FEATURE_SPARE_51_BIT 51 #define FEATURE_SPARE_52_BIT 52 @@ -564,6 +564,12 @@ typedef enum { TDC_THROTTLER_COUNT } TDC_THROTTLER_e; +typedef enum { + CUSTOMER_VARIANT_ROW, + CUSTOMER_VARIANT_FALCON, + CUSTOMER_VARIANT_COUNT, +} CUSTOMER_VARIANT_e; + // Used for 2-step UCLK DPM change workaround typedef struct { uint16_t Fmin; @@ -786,7 +792,10 @@ typedef struct { QuadraticInt_t ReservedEquation3; // SECTION: Sku Reserved - uint32_t SkuReserved[15]; + uint8_t CustomerVariant; + uint8_t Spare[3]; + uint32_t SkuReserved[14]; + // MAJOR SECTION: BOARD PARAMETERS @@ -865,8 +874,7 @@ typedef struct { uint16_t DfllGfxclkSpreadFreq; // kHz // UCLK Spread Spectrum - uint8_t UclkSpreadEnabled; // on or off - uint8_t UclkSpreadPercent; // Q4.4 + uint16_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // FCLK Spread Spectrum @@ -896,8 +904,11 @@ typedef struct { uint8_t VddqOffEnabled; uint8_t PaddingUmcFlags[2]; + // UCLK Spread Spectrum + uint8_t UclkSpreadPercent[16]; + // SECTION: Board Reserved - uint32_t BoardReserved[15]; + uint32_t BoardReserved[11]; // SECTION: Structure Padding @@ -936,10 +947,12 @@ typedef struct { int16_t OverDrivePct; // % uint16_t FanMaximumRpm; uint16_t FanMinimumPwm; + uint16_t FanAcousticLimitRpm; uint16_t FanTargetTemperature; // Degree Celcius uint8_t FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS]; uint8_t FanLinearTempPoints[NUM_OD_FAN_MAX_POINTS]; uint16_t MaxOpTemp; // Degree Celcius + uint16_t Padding_16[1]; uint8_t FanZeroRpmEnable; uint8_t FanZeroRpmStopTemp; uint8_t FanMode; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h index dff2295705be..7b585e205a5a 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h @@ -173,6 +173,7 @@ __SMU_DUMMY_MAP(GmiPwrDnControl), \ __SMU_DUMMY_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE), \ __SMU_DUMMY_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE), \ + __SMU_DUMMY_MAP(Mode1Reset), \ #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index 4fb911d8b49c..f06158511f93 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -30,7 +30,8 @@ #define SMU11_DRIVER_IF_VERSION_NV10 0x36 #define SMU11_DRIVER_IF_VERSION_NV12 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x36 -#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x31 +#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x33 +#define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0x2B /* MP Apertures */ #define MP0_Public 0x03800000 @@ -48,6 +49,7 @@ #define SMU11_TOOL_SIZE 0x19000 +#define MAX_DPM_LEVELS 16 #define MAX_PCIE_CONF 2 #define CLK_MAP(clk, index) \ @@ -65,6 +67,10 @@ #define WORKLOAD_MAP(profile, workload) \ [profile] = {1, (workload)} +#define CTF_OFFSET_EDGE 5 +#define CTF_OFFSET_HOTSPOT 5 +#define CTF_OFFSET_MEM 5 + static const struct smu_temperature_range smu11_thermal_policy[] = { {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, @@ -91,9 +97,17 @@ struct smu_11_0_max_sustainable_clocks { uint32_t soc_clock; }; +struct smu_11_0_dpm_clk_level { + bool enabled; + uint32_t value; +}; + struct smu_11_0_dpm_table { - uint32_t min; /* MHz */ - uint32_t max; /* MHz */ + uint32_t min; /* MHz */ + uint32_t max; /* MHz */ + uint32_t count; + bool is_fine_grained; + struct smu_11_0_dpm_clk_level dpm_levels[MAX_DPM_LEVELS]; }; struct smu_11_0_pcie_table { @@ -107,7 +121,9 @@ struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table uclk_table; struct smu_11_0_dpm_table eclk_table; struct smu_11_0_dpm_table vclk_table; + struct smu_11_0_dpm_table vclk1_table; struct smu_11_0_dpm_table dclk_table; + struct smu_11_0_dpm_table dclk1_table; struct smu_11_0_dpm_table dcef_table; struct smu_11_0_dpm_table pixel_table; struct smu_11_0_dpm_table display_table; @@ -197,19 +213,13 @@ int smu_v11_0_get_current_power_limit(struct smu_context *smu, int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n); -int smu_v11_0_get_current_clk_freq(struct smu_context *smu, - enum smu_clk_type clk_id, - uint32_t *value); - int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu); int smu_v11_0_enable_thermal_alert(struct smu_context *smu); int smu_v11_0_disable_thermal_alert(struct smu_context *smu); -int smu_v11_0_read_sensor(struct smu_context *smu, - enum amd_pp_sensors sensor, - void *data, uint32_t *size); +int smu_v11_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value); int smu_v11_0_set_min_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk); @@ -252,13 +262,18 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) int smu_v11_0_baco_enter(struct smu_context *smu); int smu_v11_0_baco_exit(struct smu_context *smu); +int smu_v11_0_mode1_reset(struct smu_context *smu); + int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); -int smu_v11_0_override_pcie_parameters(struct smu_context *smu); +int smu_v11_0_set_hard_freq_limited_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t min, + uint32_t max); int smu_v11_0_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level); @@ -266,4 +281,22 @@ int smu_v11_0_set_performance_level(struct smu_context *smu, int smu_v11_0_set_power_source(struct smu_context *smu, enum smu_power_src_type power_src); +int smu_v11_0_get_dpm_freq_by_index(struct smu_context *smu, + enum smu_clk_type clk_type, + uint16_t level, + uint32_t *value); + +int smu_v11_0_get_dpm_level_count(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *value); + +int smu_v11_0_set_single_dpm_table(struct smu_context *smu, + enum smu_clk_type clk_type, + struct smu_11_0_dpm_table *single_dpm_table); + +int smu_v11_0_get_dpm_level_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min_value, + uint32_t *max_value); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h index d29f75223987..fd83a723f32c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h @@ -60,10 +60,6 @@ int smu_v12_0_powergate_jpeg(struct smu_context *smu, bool gate); int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable); -int smu_v12_0_read_sensor(struct smu_context *smu, - enum amd_pp_sensors sensor, - void *data, uint32_t *size); - uint32_t smu_v12_0_get_gfxoff_status(struct smu_context *smu); int smu_v12_0_gfx_off_control(struct smu_context *smu, bool enable); @@ -77,13 +73,6 @@ int smu_v12_0_set_default_dpm_tables(struct smu_context *smu); int smu_v12_0_get_enabled_mask(struct smu_context *smu, uint32_t *feature_mask, uint32_t num); -int smu_v12_0_get_current_clk_freq(struct smu_context *smu, - enum smu_clk_type clk_id, - uint32_t *value); - -int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max); - int smu_v12_0_mode2_reset(struct smu_context *smu); int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 42ade9df0d6a..ead135f39c7e 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -689,44 +689,171 @@ static int navi10_allocate_dpm_context(struct smu_context *smu) static int navi10_set_default_dpm_table(struct smu_context *smu) { - struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct smu_table_context *table_context = &smu->smu_table; - struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; - PPTable_t *driver_ppt = NULL; - int i; - - driver_ppt = table_context->driver_pptable; - - dpm_context->dpm_tables.soc_table.min = driver_ppt->FreqTableSocclk[0]; - dpm_context->dpm_tables.soc_table.max = driver_ppt->FreqTableSocclk[NUM_SOCCLK_DPM_LEVELS - 1]; + struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + PPTable_t *driver_ppt = smu->smu_table.driver_pptable; + struct smu_11_0_dpm_table *dpm_table; + int ret = 0; - dpm_context->dpm_tables.gfx_table.min = driver_ppt->FreqTableGfx[0]; - dpm_context->dpm_tables.gfx_table.max = driver_ppt->FreqTableGfx[NUM_GFXCLK_DPM_LEVELS - 1]; + /* socclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.soc_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_SOCCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_SOCCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.socclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.uclk_table.min = driver_ppt->FreqTableUclk[0]; - dpm_context->dpm_tables.uclk_table.max = driver_ppt->FreqTableUclk[NUM_UCLK_DPM_LEVELS - 1]; + /* gfxclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.gfx_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_GFXCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_GFXCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.vclk_table.min = driver_ppt->FreqTableVclk[0]; - dpm_context->dpm_tables.vclk_table.max = driver_ppt->FreqTableVclk[NUM_VCLK_DPM_LEVELS - 1]; + /* uclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.uclk_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_UCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_UCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.uclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.dclk_table.min = driver_ppt->FreqTableDclk[0]; - dpm_context->dpm_tables.dclk_table.max = driver_ppt->FreqTableDclk[NUM_DCLK_DPM_LEVELS - 1]; + /* vclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.vclk_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_VCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_VCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.vclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.dcef_table.min = driver_ppt->FreqTableDcefclk[0]; - dpm_context->dpm_tables.dcef_table.max = driver_ppt->FreqTableDcefclk[NUM_DCEFCLK_DPM_LEVELS - 1]; + /* dclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.dclk_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_VCN_PG_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_DCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_DCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.pixel_table.min = driver_ppt->FreqTablePixclk[0]; - dpm_context->dpm_tables.pixel_table.max = driver_ppt->FreqTablePixclk[NUM_PIXCLK_DPM_LEVELS - 1]; + /* dcefclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.dcef_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_DCEFCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_DCEFCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.display_table.min = driver_ppt->FreqTableDispclk[0]; - dpm_context->dpm_tables.display_table.max = driver_ppt->FreqTableDispclk[NUM_DISPCLK_DPM_LEVELS - 1]; + /* pixelclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.pixel_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_PIXCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_PIXCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0]; - dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1]; + /* displayclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.display_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_DISPCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_DISPCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - for (i = 0; i < MAX_PCIE_CONF; i++) { - dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i]; - dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i]; + /* phyclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.phy_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_PHYCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_PHYCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; } return 0; @@ -877,20 +1004,17 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_UCLK: case SMU_FCLK: case SMU_DCEFCLK: - ret = smu_get_current_clk_freq(smu, clk_type, &cur_value); + ret = navi10_get_current_clk_freq_by_table(smu, clk_type, &cur_value); if (ret) return size; - /* 10KHz -> MHz */ - cur_value = cur_value / 100; - - ret = smu_get_dpm_level_count(smu, clk_type, &count); + ret = smu_v11_0_get_dpm_level_count(smu, clk_type, &count); if (ret) return size; if (!navi10_is_support_fine_grained_dpm(smu, clk_type)) { for (i = 0; i < count; i++) { - ret = smu_get_dpm_freq_by_index(smu, clk_type, i, &value); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &value); if (ret) return size; @@ -898,10 +1022,10 @@ static int navi10_print_clk_levels(struct smu_context *smu, cur_value == value ? "*" : ""); } } else { - ret = smu_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]); if (ret) return size; - ret = smu_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]); if (ret) return size; @@ -978,7 +1102,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, default: break; } - size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); + size += sprintf(buf + size, "%d: %uMHz %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); } break; case SMU_OD_RANGE: @@ -1061,15 +1185,15 @@ static int navi10_force_clk_levels(struct smu_context *smu, soft_min_level = (soft_min_level >= 1 ? 1 : 0); } - ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) return size; - ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq); if (ret) return size; - ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq, false); + ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq); if (ret) return size; break; @@ -1082,22 +1206,93 @@ static int navi10_force_clk_levels(struct smu_context *smu, static int navi10_populate_umd_state_clk(struct smu_context *smu) { - int ret = 0; - uint32_t min_sclk_freq = 0, min_mclk_freq = 0; - - ret = smu_get_dpm_freq_range(smu, SMU_SCLK, &min_sclk_freq, NULL, false); - if (ret) - return ret; - - smu->pstate_sclk = min_sclk_freq * 100; - - ret = smu_get_dpm_freq_range(smu, SMU_MCLK, &min_mclk_freq, NULL, false); - if (ret) - return ret; + struct smu_11_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_11_0_dpm_table *gfx_table = + &dpm_context->dpm_tables.gfx_table; + struct smu_11_0_dpm_table *mem_table = + &dpm_context->dpm_tables.uclk_table; + struct smu_11_0_dpm_table *soc_table = + &dpm_context->dpm_tables.soc_table; + struct smu_umd_pstate_table *pstate_table = + &smu->pstate_table; + struct amdgpu_device *adev = smu->adev; + uint32_t sclk_freq; - smu->pstate_mclk = min_mclk_freq * 100; + pstate_table->gfxclk_pstate.min = gfx_table->min; + switch (adev->asic_type) { + case CHIP_NAVI10: + switch (adev->pdev->revision) { + case 0xf0: /* XTX */ + case 0xc0: + sclk_freq = NAVI10_PEAK_SCLK_XTX; + break; + case 0xf1: /* XT */ + case 0xc1: + sclk_freq = NAVI10_PEAK_SCLK_XT; + break; + default: /* XL */ + sclk_freq = NAVI10_PEAK_SCLK_XL; + break; + } + break; + case CHIP_NAVI14: + switch (adev->pdev->revision) { + case 0xc7: /* XT */ + case 0xf4: + sclk_freq = NAVI14_UMD_PSTATE_PEAK_XT_GFXCLK; + break; + case 0xc1: /* XTM */ + case 0xf2: + sclk_freq = NAVI14_UMD_PSTATE_PEAK_XTM_GFXCLK; + break; + case 0xc3: /* XLM */ + case 0xf3: + sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK; + break; + case 0xc5: /* XTX */ + case 0xf6: + sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK; + break; + default: /* XL */ + sclk_freq = NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK; + break; + } + break; + case CHIP_NAVI12: + sclk_freq = NAVI12_UMD_PSTATE_PEAK_GFXCLK; + break; + default: + sclk_freq = gfx_table->dpm_levels[gfx_table->count - 1].value; + break; + } + pstate_table->gfxclk_pstate.peak = sclk_freq; + + pstate_table->uclk_pstate.min = mem_table->min; + pstate_table->uclk_pstate.peak = mem_table->max; + + pstate_table->socclk_pstate.min = soc_table->min; + pstate_table->socclk_pstate.peak = soc_table->max; + + if (gfx_table->max > NAVI10_UMD_PSTATE_PROFILING_GFXCLK && + mem_table->max > NAVI10_UMD_PSTATE_PROFILING_MEMCLK && + soc_table->max > NAVI10_UMD_PSTATE_PROFILING_SOCCLK) { + pstate_table->gfxclk_pstate.standard = + NAVI10_UMD_PSTATE_PROFILING_GFXCLK; + pstate_table->uclk_pstate.standard = + NAVI10_UMD_PSTATE_PROFILING_MEMCLK; + pstate_table->socclk_pstate.standard = + NAVI10_UMD_PSTATE_PROFILING_SOCCLK; + } else { + pstate_table->gfxclk_pstate.standard = + pstate_table->gfxclk_pstate.min; + pstate_table->uclk_pstate.standard = + pstate_table->uclk_pstate.min; + pstate_table->socclk_pstate.standard = + pstate_table->socclk_pstate.min; + } - return ret; + return 0; } static int navi10_get_clock_by_type_with_latency(struct smu_context *smu, @@ -1113,7 +1308,7 @@ static int navi10_get_clock_by_type_with_latency(struct smu_context *smu, case SMU_SOCCLK: case SMU_MCLK: case SMU_UCLK: - ret = smu_get_dpm_level_count(smu, clk_type, &level_count); + ret = smu_v11_0_get_dpm_level_count(smu, clk_type, &level_count); if (ret) return ret; @@ -1121,7 +1316,7 @@ static int navi10_get_clock_by_type_with_latency(struct smu_context *smu, clocks->num_levels = level_count; for (i = 0; i < level_count; i++) { - ret = smu_get_dpm_freq_by_index(smu, clk_type, i, &freq); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &freq); if (ret) return ret; @@ -1146,10 +1341,10 @@ static int navi10_pre_display_config_changed(struct smu_context *smu) return ret; if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { - ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &max_freq, false); + ret = smu_v11_0_get_dpm_ultimate_freq(smu, SMU_UCLK, NULL, &max_freq); if (ret) return ret; - ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, max_freq); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, 0, max_freq); if (ret) return ret; } @@ -1174,59 +1369,6 @@ static int navi10_display_config_changed(struct smu_context *smu) return ret; } -static int navi10_force_dpm_limit_value(struct smu_context *smu, bool highest) -{ - int ret = 0, i = 0; - uint32_t min_freq, max_freq, force_freq; - enum smu_clk_type clk_type; - - enum smu_clk_type clks[] = { - SMU_GFXCLK, - SMU_MCLK, - SMU_SOCCLK, - }; - - for (i = 0; i < ARRAY_SIZE(clks); i++) { - clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); - if (ret) - return ret; - - force_freq = highest ? max_freq : min_freq; - ret = smu_set_soft_freq_range(smu, clk_type, force_freq, force_freq, false); - if (ret) - return ret; - } - - return ret; -} - -static int navi10_unforce_dpm_levels(struct smu_context *smu) -{ - int ret = 0, i = 0; - uint32_t min_freq, max_freq; - enum smu_clk_type clk_type; - - enum smu_clk_type clks[] = { - SMU_GFXCLK, - SMU_MCLK, - SMU_SOCCLK, - }; - - for (i = 0; i < ARRAY_SIZE(clks); i++) { - clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); - if (ret) - return ret; - - ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq, false); - if (ret) - return ret; - } - - return ret; -} - static int navi10_get_gpu_power(struct smu_context *smu, uint32_t *value) { if (!value) @@ -1479,47 +1621,6 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u return ret; } -static int navi10_get_profiling_clk_mask(struct smu_context *smu, - enum amd_dpm_forced_level level, - uint32_t *sclk_mask, - uint32_t *mclk_mask, - uint32_t *soc_mask) -{ - int ret = 0; - uint32_t level_count = 0; - - if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { - if (sclk_mask) - *sclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { - if (mclk_mask) - *mclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { - if(sclk_mask) { - ret = smu_get_dpm_level_count(smu, SMU_SCLK, &level_count); - if (ret) - return ret; - *sclk_mask = level_count - 1; - } - - if(mclk_mask) { - ret = smu_get_dpm_level_count(smu, SMU_MCLK, &level_count); - if (ret) - return ret; - *mclk_mask = level_count - 1; - } - - if(soc_mask) { - ret = smu_get_dpm_level_count(smu, SMU_SOCCLK, &level_count); - if (ret) - return ret; - *soc_mask = level_count - 1; - } - } - - return ret; -} - static int navi10_notify_smc_display_config(struct smu_context *smu) { struct smu_clocks min_clocks = {0}; @@ -1552,7 +1653,7 @@ static int navi10_notify_smc_display_config(struct smu_context *smu) } if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { - ret = smu_set_hard_freq_range(smu, SMU_UCLK, min_clocks.memory_clock/100, 0); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, min_clocks.memory_clock/100, 0); if (ret) { dev_err(smu->adev->dev, "[%s] Set hard min uclk failed!", __func__); return ret; @@ -1700,8 +1801,23 @@ static int navi10_read_sensor(struct smu_context *smu, ret = navi10_thermal_get_temperature(smu, sensor, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_GFX_MCLK: + ret = navi10_get_current_clk_freq_by_table(smu, SMU_UCLK, (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_SCLK: + ret = navi10_get_current_clk_freq_by_table(smu, SMU_GFXCLK, (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDGFX: + ret = smu_v11_0_get_gfx_vdd(smu, (uint32_t *)data); + *size = 4; + break; default: - ret = smu_v11_0_read_sensor(smu, sensor, data, size); + ret = -EOPNOTSUPP; + break; } mutex_unlock(&smu->sensor_lock); @@ -1737,160 +1853,32 @@ static int navi10_get_uclk_dpm_states(struct smu_context *smu, uint32_t *clocks_ return 0; } -static int navi10_set_performance_level(struct smu_context *smu, - enum amd_dpm_forced_level level); - -static int navi10_set_standard_performance_level(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - uint32_t sclk_freq = 0, uclk_freq = 0; - - switch (adev->asic_type) { - case CHIP_NAVI10: - sclk_freq = NAVI10_UMD_PSTATE_PROFILING_GFXCLK; - uclk_freq = NAVI10_UMD_PSTATE_PROFILING_MEMCLK; - break; - case CHIP_NAVI14: - sclk_freq = NAVI14_UMD_PSTATE_PROFILING_GFXCLK; - uclk_freq = NAVI14_UMD_PSTATE_PROFILING_MEMCLK; - break; - default: - /* by default, this is same as auto performance level */ - return navi10_set_performance_level(smu, AMD_DPM_FORCED_LEVEL_AUTO); - } - - ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq, false); - if (ret) - return ret; - ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq, false); - if (ret) - return ret; - - return ret; -} - -static int navi10_set_peak_performance_level(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - uint32_t sclk_freq = 0, uclk_freq = 0; - - switch (adev->asic_type) { - case CHIP_NAVI10: - switch (adev->pdev->revision) { - case 0xf0: /* XTX */ - case 0xc0: - sclk_freq = NAVI10_PEAK_SCLK_XTX; - break; - case 0xf1: /* XT */ - case 0xc1: - sclk_freq = NAVI10_PEAK_SCLK_XT; - break; - default: /* XL */ - sclk_freq = NAVI10_PEAK_SCLK_XL; - break; - } - break; - case CHIP_NAVI14: - switch (adev->pdev->revision) { - case 0xc7: /* XT */ - case 0xf4: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XT_GFXCLK; - break; - case 0xc1: /* XTM */ - case 0xf2: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XTM_GFXCLK; - break; - case 0xc3: /* XLM */ - case 0xf3: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK; - break; - case 0xc5: /* XTX */ - case 0xf6: - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XLM_GFXCLK; - break; - default: /* XL */ - sclk_freq = NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK; - break; - } - break; - case CHIP_NAVI12: - sclk_freq = NAVI12_UMD_PSTATE_PEAK_GFXCLK; - break; - default: - ret = smu_get_dpm_level_range(smu, SMU_SCLK, NULL, &sclk_freq); - if (ret) - return ret; - } - - ret = smu_get_dpm_level_range(smu, SMU_UCLK, NULL, &uclk_freq); - if (ret) - return ret; - - ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq, false); - if (ret) - return ret; - ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq, false); - if (ret) - return ret; - - return ret; -} - -static int navi10_set_performance_level(struct smu_context *smu, - enum amd_dpm_forced_level level) -{ - int ret = 0; - uint32_t sclk_mask, mclk_mask, soc_mask; - - switch (level) { - case AMD_DPM_FORCED_LEVEL_HIGH: - ret = smu_force_dpm_limit_value(smu, true); - break; - case AMD_DPM_FORCED_LEVEL_LOW: - ret = smu_force_dpm_limit_value(smu, false); - break; - case AMD_DPM_FORCED_LEVEL_AUTO: - ret = smu_unforce_dpm_levels(smu); - break; - case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: - ret = navi10_set_standard_performance_level(smu); - break; - case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: - case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: - ret = smu_get_profiling_clk_mask(smu, level, - &sclk_mask, - &mclk_mask, - &soc_mask); - if (ret) - return ret; - smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false); - smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false); - smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false); - break; - case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: - ret = navi10_set_peak_performance_level(smu); - break; - case AMD_DPM_FORCED_LEVEL_MANUAL: - case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: - default: - break; - } - return ret; -} - static int navi10_get_thermal_temperature_range(struct smu_context *smu, struct smu_temperature_range *range) { struct smu_table_context *table_context = &smu->smu_table; - struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; + struct smu_11_0_powerplay_table *powerplay_table = + table_context->power_play_table; + PPTable_t *pptable = smu->smu_table.driver_pptable; - if (!range || !powerplay_table) + if (!range) return -EINVAL; - range->max = powerplay_table->software_shutdown_temp * + memcpy(range, &smu11_thermal_policy[0], sizeof(struct smu_temperature_range)); + + range->max = pptable->TedgeLimit * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_crit_max = pptable->ThotspotLimit * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_crit_max = pptable->TmemLimit * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_emergency_max = (pptable->TmemLimit + CTF_OFFSET_MEM)* + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->software_shutdown_temp = powerplay_table->software_shutdown_temp; return 0; } @@ -1909,9 +1897,9 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu, return 0; if(disable_memory_clock_switch) - ret = smu_set_hard_freq_range(smu, SMU_UCLK, max_memory_clock, 0); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, max_memory_clock, 0); else - ret = smu_set_hard_freq_range(smu, SMU_UCLK, min_memory_clock, 0); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, min_memory_clock, 0); if(!ret) smu->disable_uclk_switch = disable_memory_clock_switch; @@ -1956,12 +1944,16 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) { + struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; PPTable_t *pptable = smu->smu_table.driver_pptable; - int ret, i; uint32_t smu_pcie_arg; + int ret, i; - struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + /* lclk dpm table setup */ + for (i = 0; i < MAX_PCIE_CONF; i++) { + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pptable->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pptable->PcieLaneCount[i]; + } for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16) | @@ -2039,7 +2031,7 @@ static bool navi10_is_baco_supported(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t val; - if (!smu_v11_0_baco_is_support(smu)) + if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu))) return false; val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); @@ -2319,25 +2311,25 @@ static int navi10_disable_umc_cdr_12gbps_workaround(struct smu_context *smu) if (smu_version < 0x2A3200) return 0; - ret = smu_get_dpm_level_count(smu, SMU_UCLK, &uclk_count); + ret = smu_v11_0_get_dpm_level_count(smu, SMU_UCLK, &uclk_count); if (ret) return ret; - ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)0, &uclk_min); + ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)0, &uclk_min); if (ret) return ret; - ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)(uclk_count - 1), &uclk_max); + ret = smu_v11_0_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)(uclk_count - 1), &uclk_max); if (ret) return ret; /* Force UCLK out of the highest DPM */ - ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, uclk_min); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, 0, uclk_min); if (ret) return ret; /* Revert the UCLK Hardmax */ - ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, uclk_max); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, 0, uclk_max); if (ret) return ret; @@ -2348,37 +2340,6 @@ static int navi10_disable_umc_cdr_12gbps_workaround(struct smu_context *smu) return navi10_dummy_pstate_control(smu, true); } -static int navi10_set_thermal_range(struct smu_context *smu, - struct smu_temperature_range range) -{ - struct amdgpu_device *adev = smu->adev; - int low = SMU_THERMAL_MINIMUM_ALERT_TEMP; - int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP; - uint32_t val; - struct smu_table_context *table_context = &smu->smu_table; - struct smu_11_0_powerplay_table *powerplay_table = table_context->power_play_table; - - low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, - range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); - high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); - - if (low > high) - return -EINVAL; - - val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high & 0xff)); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low & 0xff)); - val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); - - WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); - - return 0; -} - static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, .alloc_dpm_context = navi10_allocate_dpm_context, @@ -2392,7 +2353,6 @@ static const struct pptable_funcs navi10_ppt_funcs = { .set_default_dpm_table = navi10_set_default_dpm_table, .dpm_set_vcn_enable = navi10_dpm_set_vcn_enable, .dpm_set_jpeg_enable = navi10_dpm_set_jpeg_enable, - .get_current_clk_freq_by_table = navi10_get_current_clk_freq_by_table, .print_clk_levels = navi10_print_clk_levels, .force_clk_levels = navi10_force_clk_levels, .populate_umd_state_clk = navi10_populate_umd_state_clk, @@ -2400,18 +2360,15 @@ static const struct pptable_funcs navi10_ppt_funcs = { .pre_display_config_changed = navi10_pre_display_config_changed, .display_config_changed = navi10_display_config_changed, .notify_smc_display_config = navi10_notify_smc_display_config, - .force_dpm_limit_value = navi10_force_dpm_limit_value, - .unforce_dpm_levels = navi10_unforce_dpm_levels, .is_dpm_running = navi10_is_dpm_running, .get_fan_speed_percent = navi10_get_fan_speed_percent, .get_fan_speed_rpm = navi10_get_fan_speed_rpm, .get_power_profile_mode = navi10_get_power_profile_mode, .set_power_profile_mode = navi10_set_power_profile_mode, - .get_profiling_clk_mask = navi10_get_profiling_clk_mask, .set_watermarks_table = navi10_set_watermarks_table, .read_sensor = navi10_read_sensor, .get_uclk_dpm_states = navi10_get_uclk_dpm_states, - .set_performance_level = navi10_set_performance_level, + .set_performance_level = smu_v11_0_set_performance_level, .get_thermal_temperature_range = navi10_get_thermal_temperature_range, .display_disable_memory_clock_switch = navi10_display_disable_memory_clock_switch, .get_power_limit = navi10_get_power_limit, @@ -2438,7 +2395,6 @@ static const struct pptable_funcs navi10_ppt_funcs = { .get_enabled_mask = smu_v11_0_get_enabled_mask, .notify_display_change = smu_v11_0_notify_display_change, .set_power_limit = smu_v11_0_set_power_limit, - .get_current_clk_freq = smu_v11_0_get_current_clk_freq, .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, .enable_thermal_alert = smu_v11_0_enable_thermal_alert, .disable_thermal_alert = smu_v11_0_disable_thermal_alert, @@ -2460,13 +2416,11 @@ static const struct pptable_funcs navi10_ppt_funcs = { .baco_exit = smu_v11_0_baco_exit, .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, - .override_pcie_parameters = smu_v11_0_override_pcie_parameters, .set_default_od_settings = navi10_set_default_od_settings, .od_edit_dpm_table = navi10_od_edit_dpm_table, .run_btc = navi10_run_btc, .disable_umc_cdr_12gbps_workaround = navi10_disable_umc_cdr_12gbps_workaround, .set_power_source = smu_v11_0_set_power_source, - .set_thermal_range = navi10_set_thermal_range, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index f286c1e1934f..79cadc2df0d5 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -236,23 +236,173 @@ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type if (!clk_table || clk_type >= SMU_CLK_COUNT) return -EINVAL; - GET_DPM_CUR_FREQ(clk_table, clk_type, dpm_level, *freq); + switch (clk_type) { + case SMU_SOCCLK: + if (dpm_level >= NUM_SOCCLK_DPM_LEVELS) + return -EINVAL; + *freq = clk_table->SocClocks[dpm_level].Freq; + break; + case SMU_MCLK: + if (dpm_level >= NUM_FCLK_DPM_LEVELS) + return -EINVAL; + *freq = clk_table->FClocks[dpm_level].Freq; + break; + case SMU_DCEFCLK: + if (dpm_level >= NUM_DCFCLK_DPM_LEVELS) + return -EINVAL; + *freq = clk_table->DcfClocks[dpm_level].Freq; + break; + case SMU_FCLK: + if (dpm_level >= NUM_FCLK_DPM_LEVELS) + return -EINVAL; + *freq = clk_table->FClocks[dpm_level].Freq; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int renoir_get_profiling_clk_mask(struct smu_context *smu, + enum amd_dpm_forced_level level, + uint32_t *sclk_mask, + uint32_t *mclk_mask, + uint32_t *soc_mask) +{ + + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { + if (sclk_mask) + *sclk_mask = 0; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { + if (mclk_mask) + *mclk_mask = 0; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + if(sclk_mask) + /* The sclk as gfxclk and has three level about max/min/current */ + *sclk_mask = 3 - 1; + + if(mclk_mask) + *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; + + if(soc_mask) + *soc_mask = NUM_SOCCLK_DPM_LEVELS - 1; + } return 0; } +static int renoir_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + int ret = 0; + uint32_t mclk_mask, soc_mask; + uint32_t clock_limit; + + if (!smu_clk_dpm_is_enabled(smu, clk_type)) { + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + clock_limit = smu->smu_table.boot_values.uclk; + break; + case SMU_GFXCLK: + case SMU_SCLK: + clock_limit = smu->smu_table.boot_values.gfxclk; + break; + case SMU_SOCCLK: + clock_limit = smu->smu_table.boot_values.socclk; + break; + default: + clock_limit = 0; + break; + } + + /* clock in Mhz unit */ + if (min) + *min = clock_limit / 100; + if (max) + *max = clock_limit / 100; + + return 0; + } + + if (max) { + ret = renoir_get_profiling_clk_mask(smu, + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK, + NULL, + &mclk_mask, + &soc_mask); + if (ret) + goto failed; + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + ret = smu_send_smc_msg(smu, SMU_MSG_GetMaxGfxclkFrequency, max); + if (ret) { + dev_err(smu->adev->dev, "Attempt to get max GX frequency from SMC Failed !\n"); + goto failed; + } + break; + case SMU_UCLK: + case SMU_FCLK: + case SMU_MCLK: + ret = renoir_get_dpm_clk_limited(smu, clk_type, mclk_mask, max); + if (ret) + goto failed; + break; + case SMU_SOCCLK: + ret = renoir_get_dpm_clk_limited(smu, clk_type, soc_mask, max); + if (ret) + goto failed; + break; + default: + ret = -EINVAL; + goto failed; + } + } + + if (min) { + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + ret = smu_send_smc_msg(smu, SMU_MSG_GetMinGfxclkFrequency, min); + if (ret) { + dev_err(smu->adev->dev, "Attempt to get min GX frequency from SMC Failed !\n"); + goto failed; + } + break; + case SMU_UCLK: + case SMU_FCLK: + case SMU_MCLK: + ret = renoir_get_dpm_clk_limited(smu, clk_type, 0, min); + if (ret) + goto failed; + break; + case SMU_SOCCLK: + ret = renoir_get_dpm_clk_limited(smu, clk_type, 0, min); + if (ret) + goto failed; + break; + default: + ret = -EINVAL; + goto failed; + } + } +failed: + return ret; +} + static int renoir_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; - DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_t metrics; bool cur_value_match_level = false; - if (!clk_table || clk_type >= SMU_CLK_COUNT) - return -EINVAL; - memset(&metrics, 0, sizeof(metrics)); ret = renoir_get_metrics_table(smu, &metrics); @@ -264,7 +414,7 @@ static int renoir_print_clk_levels(struct smu_context *smu, case SMU_SCLK: /* retirve table returned paramters unit is MHz */ cur_value = metrics.ClockFrequency[CLOCK_GFXCLK]; - ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min, &max, false); + ret = renoir_get_dpm_ultimate_freq(smu, SMU_GFXCLK, &min, &max); if (!ret) { /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */ if (cur_value == max) @@ -304,7 +454,9 @@ static int renoir_print_clk_levels(struct smu_context *smu, } for (i = 0; i < count; i++) { - GET_DPM_CUR_FREQ(clk_table, clk_type, i, value); + ret = renoir_get_dpm_clk_limited(smu, clk_type, i, &value); + if (ret) + return ret; if (!value) continue; size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, @@ -434,12 +586,12 @@ static int renoir_force_dpm_limit_value(struct smu_context *smu, bool highest) for (i = 0; i < ARRAY_SIZE(clks); i++) { clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); + ret = renoir_get_dpm_ultimate_freq(smu, clk_type, &min_freq, &max_freq); if (ret) return ret; force_freq = highest ? max_freq : min_freq; - ret = smu_set_soft_freq_range(smu, clk_type, force_freq, force_freq, false); + ret = smu_v12_0_set_soft_freq_limited_range(smu, clk_type, force_freq, force_freq); if (ret) return ret; } @@ -468,11 +620,11 @@ static int renoir_unforce_dpm_levels(struct smu_context *smu) { clk_type = clk_feature_map[i].clk_type; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); + ret = renoir_get_dpm_ultimate_freq(smu, clk_type, &min_freq, &max_freq); if (ret) return ret; - ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq, false); + ret = smu_v12_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq); if (ret) return ret; } @@ -552,33 +704,6 @@ static int renoir_get_workload_type(struct smu_context *smu, uint32_t profile) return pplib_workload; } -static int renoir_get_profiling_clk_mask(struct smu_context *smu, - enum amd_dpm_forced_level level, - uint32_t *sclk_mask, - uint32_t *mclk_mask, - uint32_t *soc_mask) -{ - - if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { - if (sclk_mask) - *sclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { - if (mclk_mask) - *mclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { - if(sclk_mask) - /* The sclk as gfxclk and has three level about max/min/current */ - *sclk_mask = 3 - 1; - - if(mclk_mask) - *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; - - if(soc_mask) - *soc_mask = NUM_SOCCLK_DPM_LEVELS - 1; - } - - return 0; -} /** * This interface get dpm clock table for dc @@ -620,7 +745,6 @@ static int renoir_force_clk_levels(struct smu_context *smu, int ret = 0 ; uint32_t soft_min_level = 0, soft_max_level = 0, min_freq = 0, max_freq = 0; - DpmClocks_t *clk_table = smu->smu_table.clocks_table; soft_min_level = mask ? (ffs(mask) - 1) : 0; soft_max_level = mask ? (fls(mask) - 1) : 0; @@ -633,7 +757,7 @@ static int renoir_force_clk_levels(struct smu_context *smu, return -EINVAL; } - ret = smu_get_dpm_freq_range(smu, SMU_GFXCLK, &min_freq, &max_freq, false); + ret = renoir_get_dpm_ultimate_freq(smu, SMU_GFXCLK, &min_freq, &max_freq); if (ret) return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, @@ -650,8 +774,12 @@ static int renoir_force_clk_levels(struct smu_context *smu, return ret; break; case SMU_SOCCLK: - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq); - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq); + ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_min_level, &min_freq); + if (ret) + return ret; + ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_max_level, &max_freq); + if (ret) + return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxSocclkByFreq, max_freq, NULL); if (ret) return ret; @@ -661,8 +789,12 @@ static int renoir_force_clk_levels(struct smu_context *smu, break; case SMU_MCLK: case SMU_FCLK: - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_min_level, min_freq); - GET_DPM_CUR_FREQ(clk_table, clk_type, soft_max_level, max_freq); + ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_min_level, &min_freq); + if (ret) + return ret; + ret = renoir_get_dpm_clk_limited(smu, clk_type, soft_max_level, &max_freq); + if (ret) + return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxFclkByFreq, max_freq, NULL); if (ret) return ret; @@ -683,22 +815,22 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u uint32_t profile_mode = input[size]; if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { - dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode); + dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); return -EINVAL; } /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_workload_get_type(smu, smu->power_profile_mode); + workload_type = smu_workload_get_type(smu, profile_mode); if (workload_type < 0) { /* * TODO: If some case need switch to powersave/default power mode * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving. */ - dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n",smu->power_profile_mode); + dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode); return -EINVAL; } - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, 1 << workload_type, NULL); if (ret) { @@ -716,19 +848,19 @@ static int renoir_set_peak_clock_by_device(struct smu_context *smu) int ret = 0; uint32_t sclk_freq = 0, uclk_freq = 0; - ret = smu_get_dpm_freq_range(smu, SMU_SCLK, NULL, &sclk_freq, false); + ret = renoir_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_freq); if (ret) return ret; - ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq, false); + ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_SCLK, sclk_freq, sclk_freq); if (ret) return ret; - ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &uclk_freq, false); + ret = renoir_get_dpm_ultimate_freq(smu, SMU_UCLK, NULL, &uclk_freq); if (ret) return ret; - ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq, false); + ret = smu_v12_0_set_soft_freq_limited_range(smu, SMU_UCLK, uclk_freq, uclk_freq); if (ret) return ret; @@ -743,26 +875,26 @@ static int renoir_set_performance_level(struct smu_context *smu, switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: - ret = smu_force_dpm_limit_value(smu, true); + ret = renoir_force_dpm_limit_value(smu, true); break; case AMD_DPM_FORCED_LEVEL_LOW: - ret = smu_force_dpm_limit_value(smu, false); + ret = renoir_force_dpm_limit_value(smu, false); break; case AMD_DPM_FORCED_LEVEL_AUTO: case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: - ret = smu_unforce_dpm_levels(smu); + ret = renoir_unforce_dpm_levels(smu); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: - ret = smu_get_profiling_clk_mask(smu, level, - &sclk_mask, - &mclk_mask, - &soc_mask); + ret = renoir_get_profiling_clk_mask(smu, level, + &sclk_mask, + &mclk_mask, + &soc_mask); if (ret) return ret; - smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false); - smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false); - smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false); + renoir_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask); + renoir_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask); + renoir_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask); break; case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: ret = renoir_set_peak_clock_by_device(smu); @@ -896,8 +1028,19 @@ static int renoir_read_sensor(struct smu_context *smu, ret = renoir_get_gpu_temperature(smu, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_GFX_MCLK: + ret = renoir_get_current_clk_freq_by_table(smu, SMU_UCLK, (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_SCLK: + ret = renoir_get_current_clk_freq_by_table(smu, SMU_GFXCLK, (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; default: - ret = smu_v12_0_read_sensor(smu, sensor, data, size); + ret = -EOPNOTSUPP; + break; } mutex_unlock(&smu->sensor_lock); @@ -926,16 +1069,11 @@ static const struct pptable_funcs renoir_ppt_funcs = { .get_smu_table_index = renoir_get_smu_table_index, .tables_init = renoir_tables_init, .set_power_state = NULL, - .get_dpm_clk_limited = renoir_get_dpm_clk_limited, .print_clk_levels = renoir_print_clk_levels, .get_current_power_state = renoir_get_current_power_state, .dpm_set_vcn_enable = renoir_dpm_set_vcn_enable, .dpm_set_jpeg_enable = renoir_dpm_set_jpeg_enable, - .get_current_clk_freq_by_table = renoir_get_current_clk_freq_by_table, - .force_dpm_limit_value = renoir_force_dpm_limit_value, - .unforce_dpm_levels = renoir_unforce_dpm_levels, .get_workload_type = renoir_get_workload_type, - .get_profiling_clk_mask = renoir_get_profiling_clk_mask, .force_clk_levels = renoir_force_clk_levels, .set_power_profile_mode = renoir_set_power_profile_mode, .set_performance_level = renoir_set_performance_level, @@ -953,8 +1091,7 @@ static const struct pptable_funcs renoir_ppt_funcs = { .fini_smc_tables = smu_v12_0_fini_smc_tables, .set_default_dpm_table = smu_v12_0_set_default_dpm_tables, .get_enabled_mask = smu_v12_0_get_enabled_mask, - .get_current_clk_freq = smu_v12_0_get_current_clk_freq, - .get_dpm_ultimate_freq = smu_v12_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = renoir_get_dpm_ultimate_freq, .mode2_reset = smu_v12_0_mode2_reset, .set_soft_freq_limited_range = smu_v12_0_set_soft_freq_limited_range, .set_driver_table_location = smu_v12_0_set_driver_table_location, diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h index 89cd6da118a3..8c3f004cdf8d 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.h @@ -30,24 +30,4 @@ extern void renoir_set_ppt_funcs(struct smu_context *smu); #define RENOIR_UMD_PSTATE_SOCCLK 678 #define RENOIR_UMD_PSTATE_FCLK 800 -#define GET_DPM_CUR_FREQ(table, clk_type, dpm_level, freq) \ - do { \ - switch (clk_type) { \ - case SMU_SOCCLK: \ - freq = table->SocClocks[dpm_level].Freq; \ - break; \ - case SMU_MCLK: \ - freq = table->FClocks[dpm_level].Freq; \ - break; \ - case SMU_DCEFCLK: \ - freq = table->DcfClocks[dpm_level].Freq; \ - break; \ - case SMU_FCLK: \ - freq = table->FClocks[dpm_level].Freq; \ - break; \ - default: \ - break; \ - } \ - } while (0) - #endif diff --git a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c index 1378dabb6463..5faef41b63a3 100644 --- a/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/sienna_cichlid_ppt.c @@ -39,8 +39,8 @@ #include "nbio/nbio_2_3_sh_mask.h" #include "thm/thm_11_0_2_offset.h" #include "thm/thm_11_0_2_sh_mask.h" - -#include "asic_reg/mp/mp_11_0_sh_mask.h" +#include "mp/mp_11_0_offset.h" +#include "mp/mp_11_0_sh_mask.h" /* * DO NOT use these for err/warn/info/debug messages. @@ -116,6 +116,7 @@ static struct smu_11_0_cmn2aisc_mapping sienna_cichlid_message_map[SMU_MSG_MAX_C MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg), MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset), }; static struct smu_11_0_cmn2aisc_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = { @@ -394,7 +395,6 @@ static int sienna_cichlid_append_powerplay_table(struct smu_context *smu) PPTable_t *smc_pptable = table_context->driver_pptable; struct atom_smc_dpm_info_v4_9 *smc_dpm_table; int index, ret; - int i; index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, smc_dpm_info); @@ -405,92 +405,8 @@ static int sienna_cichlid_append_powerplay_table(struct smu_context *smu) return ret; memcpy(smc_pptable->I2cControllers, smc_dpm_table->I2cControllers, - sizeof(I2cControllerConfig_t) * NUM_I2C_CONTROLLERS); - - /* SVI2 Board Parameters */ - smc_pptable->VddGfxVrMapping = smc_dpm_table->VddGfxVrMapping; - smc_pptable->VddSocVrMapping = smc_dpm_table->VddSocVrMapping; - smc_pptable->VddMem0VrMapping = smc_dpm_table->VddMem0VrMapping; - smc_pptable->VddMem1VrMapping = smc_dpm_table->VddMem1VrMapping; - smc_pptable->GfxUlvPhaseSheddingMask = smc_dpm_table->GfxUlvPhaseSheddingMask; - smc_pptable->SocUlvPhaseSheddingMask = smc_dpm_table->SocUlvPhaseSheddingMask; - smc_pptable->VddciUlvPhaseSheddingMask = smc_dpm_table->VddciUlvPhaseSheddingMask; - smc_pptable->MvddUlvPhaseSheddingMask = smc_dpm_table->MvddUlvPhaseSheddingMask; - - /* Telemetry Settings */ - smc_pptable->GfxMaxCurrent = smc_dpm_table->GfxMaxCurrent; - smc_pptable->GfxOffset = smc_dpm_table->GfxOffset; - smc_pptable->Padding_TelemetryGfx = smc_dpm_table->Padding_TelemetryGfx; - smc_pptable->SocMaxCurrent = smc_dpm_table->SocMaxCurrent; - smc_pptable->SocOffset = smc_dpm_table->SocOffset; - smc_pptable->Padding_TelemetrySoc = smc_dpm_table->Padding_TelemetrySoc; - smc_pptable->Mem0MaxCurrent = smc_dpm_table->Mem0MaxCurrent; - smc_pptable->Mem0Offset = smc_dpm_table->Mem0Offset; - smc_pptable->Padding_TelemetryMem0 = smc_dpm_table->Padding_TelemetryMem0; - smc_pptable->Mem1MaxCurrent = smc_dpm_table->Mem1MaxCurrent; - smc_pptable->Mem1Offset = smc_dpm_table->Mem1Offset; - smc_pptable->Padding_TelemetryMem1 = smc_dpm_table->Padding_TelemetryMem1; - smc_pptable->MvddRatio = smc_dpm_table->MvddRatio; - - /* GPIO Settings */ - smc_pptable->AcDcGpio = smc_dpm_table->AcDcGpio; - smc_pptable->AcDcPolarity = smc_dpm_table->AcDcPolarity; - smc_pptable->VR0HotGpio = smc_dpm_table->VR0HotGpio; - smc_pptable->VR0HotPolarity = smc_dpm_table->VR0HotPolarity; - smc_pptable->VR1HotGpio = smc_dpm_table->VR1HotGpio; - smc_pptable->VR1HotPolarity = smc_dpm_table->VR1HotPolarity; - smc_pptable->GthrGpio = smc_dpm_table->GthrGpio; - smc_pptable->GthrPolarity = smc_dpm_table->GthrPolarity; - - /* LED Display Settings */ - smc_pptable->LedPin0 = smc_dpm_table->LedPin0; - smc_pptable->LedPin1 = smc_dpm_table->LedPin1; - smc_pptable->LedPin2 = smc_dpm_table->LedPin2; - smc_pptable->LedEnableMask = smc_dpm_table->LedEnableMask; - smc_pptable->LedPcie = smc_dpm_table->LedPcie; - smc_pptable->LedError = smc_dpm_table->LedError; - smc_pptable->LedSpare1[0] = smc_dpm_table->LedSpare1[0]; - smc_pptable->LedSpare1[1] = smc_dpm_table->LedSpare1[1]; - - /* GFXCLK PLL Spread Spectrum */ - smc_pptable->PllGfxclkSpreadEnabled = smc_dpm_table->PllGfxclkSpreadEnabled; - smc_pptable->PllGfxclkSpreadPercent = smc_dpm_table->PllGfxclkSpreadPercent; - smc_pptable->PllGfxclkSpreadFreq = smc_dpm_table->PllGfxclkSpreadFreq; - - /* GFXCLK DFLL Spread Spectrum */ - smc_pptable->DfllGfxclkSpreadEnabled = smc_dpm_table->DfllGfxclkSpreadEnabled; - smc_pptable->DfllGfxclkSpreadPercent = smc_dpm_table->DfllGfxclkSpreadPercent; - smc_pptable->DfllGfxclkSpreadFreq = smc_dpm_table->DfllGfxclkSpreadFreq; - - /* UCLK Spread Spectrum */ - smc_pptable->UclkSpreadEnabled = smc_dpm_table->UclkSpreadEnabled; - smc_pptable->UclkSpreadPercent = smc_dpm_table->UclkSpreadPercent; - smc_pptable->UclkSpreadFreq = smc_dpm_table->UclkSpreadFreq; - - /* FCLK Spred Spectrum */ - smc_pptable->FclkSpreadEnabled = smc_dpm_table->FclkSpreadEnabled; - smc_pptable->FclkSpreadPercent = smc_dpm_table->FclkSpreadPercent; - smc_pptable->FclkSpreadFreq = smc_dpm_table->FclkSpreadFreq; - - /* Memory Config */ - smc_pptable->MemoryChannelEnabled = smc_dpm_table->MemoryChannelEnabled; - smc_pptable->DramBitWidth = smc_dpm_table->DramBitWidth; - smc_pptable->PaddingMem1[0] = smc_dpm_table->PaddingMem1[0]; - smc_pptable->PaddingMem1[1] = smc_dpm_table->PaddingMem1[1]; - smc_pptable->PaddingMem1[2] = smc_dpm_table->PaddingMem1[2]; - - /* Total board power */ - smc_pptable->TotalBoardPower = smc_dpm_table->TotalBoardPower; - smc_pptable->BoardPowerPadding = smc_dpm_table->BoardPowerPadding; - - /* XGMI Training */ - for (i = 0; i < NUM_XGMI_PSTATE_LEVELS; i++) { - smc_pptable->XgmiLinkSpeed[i] = smc_dpm_table->XgmiLinkSpeed[i]; - smc_pptable->XgmiLinkWidth[i] = smc_dpm_table->XgmiLinkWidth[i]; - smc_pptable->XgmiFclkFreq[i] = smc_dpm_table->XgmiFclkFreq[i]; - smc_pptable->XgmiSocVoltage[i] = smc_dpm_table->XgmiSocVoltage[i]; - } - + sizeof(*smc_dpm_table) - sizeof(smc_dpm_table->table_header)); + return 0; } @@ -682,44 +598,225 @@ static int sienna_cichlid_allocate_dpm_context(struct smu_context *smu) static int sienna_cichlid_set_default_dpm_table(struct smu_context *smu) { - struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct smu_table_context *table_context = &smu->smu_table; - struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; - PPTable_t *driver_ppt = NULL; - int i; + struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + PPTable_t *driver_ppt = smu->smu_table.driver_pptable; + struct smu_11_0_dpm_table *dpm_table; + int ret = 0; - driver_ppt = table_context->driver_pptable; + /* socclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.soc_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_SOCCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_SOCCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.socclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.soc_table.min = driver_ppt->FreqTableSocclk[0]; - dpm_context->dpm_tables.soc_table.max = driver_ppt->FreqTableSocclk[NUM_SOCCLK_DPM_LEVELS - 1]; + /* gfxclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.gfx_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_GFXCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_GFXCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.gfx_table.min = driver_ppt->FreqTableGfx[0]; - dpm_context->dpm_tables.gfx_table.max = driver_ppt->FreqTableGfx[NUM_GFXCLK_DPM_LEVELS - 1]; + /* uclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.uclk_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_UCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_UCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.uclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.uclk_table.min = driver_ppt->FreqTableUclk[0]; - dpm_context->dpm_tables.uclk_table.max = driver_ppt->FreqTableUclk[NUM_UCLK_DPM_LEVELS - 1]; + /* fclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.fclk_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_FCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_FCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_FCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.fclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.vclk_table.min = driver_ppt->FreqTableVclk[0]; - dpm_context->dpm_tables.vclk_table.max = driver_ppt->FreqTableVclk[NUM_VCLK_DPM_LEVELS - 1]; + /* vclk0 dpm table setup */ + dpm_table = &dpm_context->dpm_tables.vclk_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_VCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_VCLK_0].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.vclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.dclk_table.min = driver_ppt->FreqTableDclk[0]; - dpm_context->dpm_tables.dclk_table.max = driver_ppt->FreqTableDclk[NUM_DCLK_DPM_LEVELS - 1]; + /* vclk1 dpm table setup */ + dpm_table = &dpm_context->dpm_tables.vclk1_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_VCLK1, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_VCLK_1].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.vclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.dcef_table.min = driver_ppt->FreqTableDcefclk[0]; - dpm_context->dpm_tables.dcef_table.max = driver_ppt->FreqTableDcefclk[NUM_DCEFCLK_DPM_LEVELS - 1]; + /* dclk0 dpm table setup */ + dpm_table = &dpm_context->dpm_tables.dclk_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_DCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_DCLK_0].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.pixel_table.min = driver_ppt->FreqTablePixclk[0]; - dpm_context->dpm_tables.pixel_table.max = driver_ppt->FreqTablePixclk[NUM_PIXCLK_DPM_LEVELS - 1]; + /* dclk1 dpm table setup */ + dpm_table = &dpm_context->dpm_tables.dclk1_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_DCLK1, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_DCLK_1].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.display_table.min = driver_ppt->FreqTableDispclk[0]; - dpm_context->dpm_tables.display_table.max = driver_ppt->FreqTableDispclk[NUM_DISPCLK_DPM_LEVELS - 1]; + /* dcefclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.dcef_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_DCEFCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_DCEFCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0]; - dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1]; + /* pixelclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.pixel_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_PIXCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_PIXCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } - for (i = 0; i < MAX_PCIE_CONF; i++) { - dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i]; - dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i]; + /* displayclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.display_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_DISPCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_DISPCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; + } + + /* phyclk dpm table setup */ + dpm_table = &dpm_context->dpm_tables.phy_table; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT)) { + ret = smu_v11_0_set_single_dpm_table(smu, + SMU_PHYCLK, + dpm_table); + if (ret) + return ret; + dpm_table->is_fine_grained = + !driver_ppt->DpmDescriptor[PPCLK_PHYCLK].SnapToDiscrete; + } else { + dpm_table->count = 1; + dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.dcefclk / 100; + dpm_table->dpm_levels[0].enabled = true; + dpm_table->min = dpm_table->dpm_levels[0].value; + dpm_table->max = dpm_table->dpm_levels[0].value; } return 0; @@ -729,6 +826,8 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl { struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; + struct amdgpu_device *adev = smu->adev; + int ret = 0; if (enable) { @@ -737,9 +836,12 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 0, NULL); if (ret) return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, 0x10000, NULL); - if (ret) - return ret; + if (adev->asic_type == CHIP_SIENNA_CICHLID) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerUpVcn, + 0x10000, NULL); + if (ret) + return ret; + } } power_gate->vcn_gated = false; } else { @@ -747,9 +849,12 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enabl ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerDownVcn, 0, NULL); if (ret) return ret; - ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerDownVcn, 0x10000, NULL); - if (ret) - return ret; + if (adev->asic_type == CHIP_SIENNA_CICHLID) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_PowerDownVcn, + 0x10000, NULL); + if (ret) + return ret; + } } power_gate->vcn_gated = true; } @@ -866,24 +971,21 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, case SMU_UCLK: case SMU_FCLK: case SMU_DCEFCLK: - ret = smu_get_current_clk_freq(smu, clk_type, &cur_value); + ret = sienna_cichlid_get_current_clk_freq_by_table(smu, clk_type, &cur_value); if (ret) goto print_clk_out; - /* 10KHz -> MHz */ - cur_value = cur_value / 100; - /* no need to disable gfxoff when retrieving the current gfxclk */ if ((clk_type == SMU_GFXCLK) || (clk_type == SMU_SCLK)) amdgpu_gfx_off_ctrl(adev, false); - ret = smu_get_dpm_level_count(smu, clk_type, &count); + ret = smu_v11_0_get_dpm_level_count(smu, clk_type, &count); if (ret) goto print_clk_out; if (!sienna_cichlid_is_support_fine_grained_dpm(smu, clk_type)) { for (i = 0; i < count; i++) { - ret = smu_get_dpm_freq_by_index(smu, clk_type, i, &value); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, i, &value); if (ret) goto print_clk_out; @@ -891,10 +993,10 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, cur_value == value ? "*" : ""); } } else { - ret = smu_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]); if (ret) goto print_clk_out; - ret = smu_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]); if (ret) goto print_clk_out; @@ -973,15 +1075,15 @@ static int sienna_cichlid_force_clk_levels(struct smu_context *smu, soft_min_level = (soft_min_level >= 1 ? 1 : 0); } - ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) goto forec_level_out; - ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq); + ret = smu_v11_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq); if (ret) goto forec_level_out; - ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq, false); + ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq); if (ret) goto forec_level_out; break; @@ -998,22 +1100,27 @@ forec_level_out: static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu) { - int ret = 0; - uint32_t min_sclk_freq = 0, min_mclk_freq = 0; - - ret = smu_get_dpm_freq_range(smu, SMU_SCLK, &min_sclk_freq, NULL, false); - if (ret) - return ret; - - smu->pstate_sclk = min_sclk_freq * 100; + struct smu_11_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_11_0_dpm_table *gfx_table = + &dpm_context->dpm_tables.gfx_table; + struct smu_11_0_dpm_table *mem_table = + &dpm_context->dpm_tables.uclk_table; + struct smu_11_0_dpm_table *soc_table = + &dpm_context->dpm_tables.soc_table; + struct smu_umd_pstate_table *pstate_table = + &smu->pstate_table; + + pstate_table->gfxclk_pstate.min = gfx_table->min; + pstate_table->gfxclk_pstate.peak = gfx_table->max; + + pstate_table->uclk_pstate.min = mem_table->min; + pstate_table->uclk_pstate.peak = mem_table->max; + + pstate_table->socclk_pstate.min = soc_table->min; + pstate_table->socclk_pstate.peak = soc_table->max; - ret = smu_get_dpm_freq_range(smu, SMU_MCLK, &min_mclk_freq, NULL, false); - if (ret) - return ret; - - smu->pstate_mclk = min_mclk_freq * 100; - - return ret; + return 0; } static int sienna_cichlid_pre_display_config_changed(struct smu_context *smu) @@ -1030,10 +1137,10 @@ static int sienna_cichlid_pre_display_config_changed(struct smu_context *smu) #endif if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { - ret = smu_get_dpm_freq_range(smu, SMU_UCLK, NULL, &max_freq, false); + ret = smu_v11_0_get_dpm_ultimate_freq(smu, SMU_UCLK, NULL, &max_freq); if (ret) return ret; - ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, max_freq); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, 0, max_freq); if (ret) return ret; } @@ -1060,59 +1167,6 @@ static int sienna_cichlid_display_config_changed(struct smu_context *smu) return ret; } -static int sienna_cichlid_force_dpm_limit_value(struct smu_context *smu, bool highest) -{ - int ret = 0, i = 0; - uint32_t min_freq, max_freq, force_freq; - enum smu_clk_type clk_type; - - enum smu_clk_type clks[] = { - SMU_GFXCLK, - SMU_MCLK, - SMU_SOCCLK, - }; - - for (i = 0; i < ARRAY_SIZE(clks); i++) { - clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); - if (ret) - return ret; - - force_freq = highest ? max_freq : min_freq; - ret = smu_set_soft_freq_range(smu, clk_type, force_freq, force_freq, false); - if (ret) - return ret; - } - - return ret; -} - -static int sienna_cichlid_unforce_dpm_levels(struct smu_context *smu) -{ - int ret = 0, i = 0; - uint32_t min_freq, max_freq; - enum smu_clk_type clk_type; - - enum smu_clk_type clks[] = { - SMU_GFXCLK, - SMU_MCLK, - SMU_SOCCLK, - }; - - for (i = 0; i < ARRAY_SIZE(clks); i++) { - clk_type = clks[i]; - ret = smu_get_dpm_freq_range(smu, clk_type, &min_freq, &max_freq, false); - if (ret) - return ret; - - ret = smu_set_soft_freq_range(smu, clk_type, min_freq, max_freq, false); - if (ret) - return ret; - } - - return ret; -} - static int sienna_cichlid_get_gpu_power(struct smu_context *smu, uint32_t *value) { if (!value) @@ -1365,50 +1419,6 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * return ret; } -static int sienna_cichlid_get_profiling_clk_mask(struct smu_context *smu, - enum amd_dpm_forced_level level, - uint32_t *sclk_mask, - uint32_t *mclk_mask, - uint32_t *soc_mask) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - uint32_t level_count = 0; - - if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { - if (sclk_mask) - *sclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { - if (mclk_mask) - *mclk_mask = 0; - } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { - if(sclk_mask) { - amdgpu_gfx_off_ctrl(adev, false); - ret = smu_get_dpm_level_count(smu, SMU_SCLK, &level_count); - amdgpu_gfx_off_ctrl(adev, true); - if (ret) - return ret; - *sclk_mask = level_count - 1; - } - - if(mclk_mask) { - ret = smu_get_dpm_level_count(smu, SMU_MCLK, &level_count); - if (ret) - return ret; - *mclk_mask = level_count - 1; - } - - if(soc_mask) { - ret = smu_get_dpm_level_count(smu, SMU_SOCCLK, &level_count); - if (ret) - return ret; - *soc_mask = level_count - 1; - } - } - - return ret; -} - static int sienna_cichlid_notify_smc_display_config(struct smu_context *smu) { struct smu_clocks min_clocks = {0}; @@ -1441,7 +1451,7 @@ static int sienna_cichlid_notify_smc_display_config(struct smu_context *smu) } if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT)) { - ret = smu_set_hard_freq_range(smu, SMU_UCLK, min_clocks.memory_clock/100, 0); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, min_clocks.memory_clock/100, 0); if (ret) { dev_err(smu->adev->dev, "[%s] Set hard min uclk failed!", __func__); return ret; @@ -1588,8 +1598,23 @@ static int sienna_cichlid_read_sensor(struct smu_context *smu, ret = sienna_cichlid_thermal_get_temperature(smu, sensor, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_GFX_MCLK: + ret = sienna_cichlid_get_current_clk_freq_by_table(smu, SMU_UCLK, (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_SCLK: + ret = sienna_cichlid_get_current_clk_freq_by_table(smu, SMU_GFXCLK, (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDGFX: + ret = smu_v11_0_get_gfx_vdd(smu, (uint32_t *)data); + *size = 4; + break; default: - ret = smu_v11_0_read_sensor(smu, sensor, data, size); + ret = -EOPNOTSUPP; + break; } mutex_unlock(&smu->sensor_lock); @@ -1625,93 +1650,32 @@ static int sienna_cichlid_get_uclk_dpm_states(struct smu_context *smu, uint32_t return 0; } -static int sienna_cichlid_set_performance_level(struct smu_context *smu, - enum amd_dpm_forced_level level); - -static int sienna_cichlid_set_standard_performance_level(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - uint32_t sclk_freq = 0, uclk_freq = 0; - - switch (adev->asic_type) { - /* TODO: need to set specify clk value by asic type, not support yet*/ - default: - /* by default, this is same as auto performance level */ - return sienna_cichlid_set_performance_level(smu, AMD_DPM_FORCED_LEVEL_AUTO); - } - - ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq, false); - if (ret) - return ret; - ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq, false); - if (ret) - return ret; - - return ret; -} - -static int sienna_cichlid_set_peak_performance_level(struct smu_context *smu) -{ - int ret = 0; - - /* TODO: not support yet*/ - return ret; -} - -static int sienna_cichlid_set_performance_level(struct smu_context *smu, - enum amd_dpm_forced_level level) -{ - int ret = 0; - uint32_t sclk_mask, mclk_mask, soc_mask; - - switch (level) { - case AMD_DPM_FORCED_LEVEL_HIGH: - ret = smu_force_dpm_limit_value(smu, true); - break; - case AMD_DPM_FORCED_LEVEL_LOW: - ret = smu_force_dpm_limit_value(smu, false); - break; - case AMD_DPM_FORCED_LEVEL_AUTO: - ret = smu_unforce_dpm_levels(smu); - break; - case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: - ret = sienna_cichlid_set_standard_performance_level(smu); - break; - case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: - case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: - ret = smu_get_profiling_clk_mask(smu, level, - &sclk_mask, - &mclk_mask, - &soc_mask); - if (ret) - return ret; - smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false); - smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false); - smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false); - break; - case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: - ret = sienna_cichlid_set_peak_performance_level(smu); - break; - case AMD_DPM_FORCED_LEVEL_MANUAL: - case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: - default: - break; - } - return ret; -} - static int sienna_cichlid_get_thermal_temperature_range(struct smu_context *smu, struct smu_temperature_range *range) { struct smu_table_context *table_context = &smu->smu_table; - struct smu_11_0_7_powerplay_table *powerplay_table = table_context->power_play_table; + struct smu_11_0_7_powerplay_table *powerplay_table = + table_context->power_play_table; + PPTable_t *pptable = smu->smu_table.driver_pptable; - if (!range || !powerplay_table) + if (!range) return -EINVAL; - range->max = powerplay_table->software_shutdown_temp * + memcpy(range, &smu11_thermal_policy[0], sizeof(struct smu_temperature_range)); + + range->max = pptable->TemperatureLimit[TEMP_EDGE] * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->edge_emergency_max = (pptable->TemperatureLimit[TEMP_EDGE] + CTF_OFFSET_EDGE) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_crit_max = pptable->TemperatureLimit[TEMP_HOTSPOT] * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_emergency_max = (pptable->TemperatureLimit[TEMP_HOTSPOT] + CTF_OFFSET_HOTSPOT) * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_crit_max = pptable->TemperatureLimit[TEMP_MEM] * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_emergency_max = (pptable->TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->software_shutdown_temp = powerplay_table->software_shutdown_temp; return 0; } @@ -1730,9 +1694,9 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context return 0; if(disable_memory_clock_switch) - ret = smu_set_hard_freq_range(smu, SMU_UCLK, max_memory_clock, 0); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, max_memory_clock, 0); else - ret = smu_set_hard_freq_range(smu, SMU_UCLK, min_memory_clock, 0); + ret = smu_v11_0_set_hard_freq_limited_range(smu, SMU_UCLK, min_memory_clock, 0); if(!ret) smu->disable_uclk_switch = disable_memory_clock_switch; @@ -1775,12 +1739,16 @@ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) { + struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; PPTable_t *pptable = smu->smu_table.driver_pptable; - int ret, i; uint32_t smu_pcie_arg; + int ret, i; - struct smu_dpm_context *smu_dpm = &smu->smu_dpm; - struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + /* lclk dpm table setup */ + for (i = 0; i < MAX_PCIE_CONF; i++) { + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pptable->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pptable->PcieLaneCount[i]; + } for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16) | @@ -1824,63 +1792,38 @@ static int sienna_cichlid_get_dpm_ultimate_freq(struct smu_context *smu, return ret; } -static int sienna_cichlid_set_soft_freq_limited_range(struct smu_context *smu, - enum smu_clk_type clk_type, - uint32_t min, uint32_t max) -{ - struct amdgpu_device *adev = smu->adev; - int ret; - - if (clk_type == SMU_GFXCLK) - amdgpu_gfx_off_ctrl(adev, false); - ret = smu_v11_0_set_soft_freq_limited_range(smu, clk_type, min, max); - if (clk_type == SMU_GFXCLK) - amdgpu_gfx_off_ctrl(adev, true); - - return ret; -} - static bool sienna_cichlid_is_baco_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; uint32_t val; - if (!smu_v11_0_baco_is_support(smu)) + if (amdgpu_sriov_vf(adev) || (!smu_v11_0_baco_is_support(smu))) return false; val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); return (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : false; } -static int sienna_cichlid_set_thermal_range(struct smu_context *smu, - struct smu_temperature_range range) +static bool sienna_cichlid_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - int low = SMU_THERMAL_MINIMUM_ALERT_TEMP; - int high = SMU_THERMAL_MAXIMUM_ALERT_TEMP; uint32_t val; - struct smu_table_context *table_context = &smu->smu_table; - struct smu_11_0_7_powerplay_table *powerplay_table = table_context->power_play_table; - - low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, - range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); - high = min((uint16_t)SMU_THERMAL_MAXIMUM_ALERT_TEMP, powerplay_table->software_shutdown_temp); - - if (low > high) - return -EINVAL; - - val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high & 0xff)); - val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low & 0xff)); - val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); - - WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); + u32 smu_version; + + /** + * SRIOV env will not support SMU mode1 reset + * PM FW support mode1 reset from 58.26 + */ + smu_get_smc_version(smu, NULL, &smu_version); + if (amdgpu_sriov_vf(adev) || (smu_version < 0x003a1a00)) + return false; - return 0; + /** + * mode1 reset relies on PSP, so we should check if + * PSP is alive. + */ + val = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + return val != 0x0; } static void sienna_cichlid_dump_pptable(struct smu_context *smu) @@ -2402,7 +2345,6 @@ static void sienna_cichlid_dump_pptable(struct smu_context *smu) dev_info(smu->adev->dev, "SkuReserved[11] = 0x%x\n", pptable->SkuReserved[11]); dev_info(smu->adev->dev, "SkuReserved[12] = 0x%x\n", pptable->SkuReserved[12]); dev_info(smu->adev->dev, "SkuReserved[13] = 0x%x\n", pptable->SkuReserved[13]); - dev_info(smu->adev->dev, "SkuReserved[14] = 0x%x\n", pptable->SkuReserved[14]); dev_info(smu->adev->dev, "GamingClk[0] = 0x%x\n", pptable->GamingClk[0]); dev_info(smu->adev->dev, "GamingClk[1] = 0x%x\n", pptable->GamingClk[1]); @@ -2489,8 +2431,7 @@ static void sienna_cichlid_dump_pptable(struct smu_context *smu) dev_info(smu->adev->dev, "DfllGfxclkSpreadPercent = 0x%x\n", pptable->DfllGfxclkSpreadPercent); dev_info(smu->adev->dev, "DfllGfxclkSpreadFreq = 0x%x\n", pptable->DfllGfxclkSpreadFreq); - dev_info(smu->adev->dev, "UclkSpreadEnabled = 0x%x\n", pptable->UclkSpreadEnabled); - dev_info(smu->adev->dev, "UclkSpreadPercent = 0x%x\n", pptable->UclkSpreadPercent); + dev_info(smu->adev->dev, "UclkSpreadPadding = 0x%x\n", pptable->UclkSpreadPadding); dev_info(smu->adev->dev, "UclkSpreadFreq = 0x%x\n", pptable->UclkSpreadFreq); dev_info(smu->adev->dev, "FclkSpreadEnabled = 0x%x\n", pptable->FclkSpreadEnabled); @@ -2535,10 +2476,6 @@ static void sienna_cichlid_dump_pptable(struct smu_context *smu) dev_info(smu->adev->dev, "BoardReserved[8] = 0x%x\n", pptable->BoardReserved[8]); dev_info(smu->adev->dev, "BoardReserved[9] = 0x%x\n", pptable->BoardReserved[9]); dev_info(smu->adev->dev, "BoardReserved[10] = 0x%x\n", pptable->BoardReserved[10]); - dev_info(smu->adev->dev, "BoardReserved[11] = 0x%x\n", pptable->BoardReserved[11]); - dev_info(smu->adev->dev, "BoardReserved[12] = 0x%x\n", pptable->BoardReserved[12]); - dev_info(smu->adev->dev, "BoardReserved[13] = 0x%x\n", pptable->BoardReserved[13]); - dev_info(smu->adev->dev, "BoardReserved[14] = 0x%x\n", pptable->BoardReserved[14]); dev_info(smu->adev->dev, "MmHubPadding[0] = 0x%x\n", pptable->MmHubPadding[0]); dev_info(smu->adev->dev, "MmHubPadding[1] = 0x%x\n", pptable->MmHubPadding[1]); @@ -2563,25 +2500,21 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .set_default_dpm_table = sienna_cichlid_set_default_dpm_table, .dpm_set_vcn_enable = sienna_cichlid_dpm_set_vcn_enable, .dpm_set_jpeg_enable = sienna_cichlid_dpm_set_jpeg_enable, - .get_current_clk_freq_by_table = sienna_cichlid_get_current_clk_freq_by_table, .print_clk_levels = sienna_cichlid_print_clk_levels, .force_clk_levels = sienna_cichlid_force_clk_levels, .populate_umd_state_clk = sienna_cichlid_populate_umd_state_clk, .pre_display_config_changed = sienna_cichlid_pre_display_config_changed, .display_config_changed = sienna_cichlid_display_config_changed, .notify_smc_display_config = sienna_cichlid_notify_smc_display_config, - .force_dpm_limit_value = sienna_cichlid_force_dpm_limit_value, - .unforce_dpm_levels = sienna_cichlid_unforce_dpm_levels, .is_dpm_running = sienna_cichlid_is_dpm_running, .get_fan_speed_percent = sienna_cichlid_get_fan_speed_percent, .get_fan_speed_rpm = sienna_cichlid_get_fan_speed_rpm, .get_power_profile_mode = sienna_cichlid_get_power_profile_mode, .set_power_profile_mode = sienna_cichlid_set_power_profile_mode, - .get_profiling_clk_mask = sienna_cichlid_get_profiling_clk_mask, .set_watermarks_table = sienna_cichlid_set_watermarks_table, .read_sensor = sienna_cichlid_read_sensor, .get_uclk_dpm_states = sienna_cichlid_get_uclk_dpm_states, - .set_performance_level = sienna_cichlid_set_performance_level, + .set_performance_level = smu_v11_0_set_performance_level, .get_thermal_temperature_range = sienna_cichlid_get_thermal_temperature_range, .display_disable_memory_clock_switch = sienna_cichlid_display_disable_memory_clock_switch, .get_power_limit = sienna_cichlid_get_power_limit, @@ -2608,7 +2541,6 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_enabled_mask = smu_v11_0_get_enabled_mask, .notify_display_change = NULL, .set_power_limit = smu_v11_0_set_power_limit, - .get_current_clk_freq = smu_v11_0_get_current_clk_freq, .init_max_sustainable_clocks = smu_v11_0_init_max_sustainable_clocks, .enable_thermal_alert = smu_v11_0_enable_thermal_alert, .disable_thermal_alert = smu_v11_0_disable_thermal_alert, @@ -2628,10 +2560,10 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .baco_set_state = smu_v11_0_baco_set_state, .baco_enter = smu_v11_0_baco_enter, .baco_exit = smu_v11_0_baco_exit, + .mode1_reset_is_support = sienna_cichlid_is_mode1_reset_supported, + .mode1_reset = smu_v11_0_mode1_reset, .get_dpm_ultimate_freq = sienna_cichlid_get_dpm_ultimate_freq, - .set_soft_freq_limited_range = sienna_cichlid_set_soft_freq_limited_range, - .override_pcie_parameters = smu_v11_0_override_pcie_parameters, - .set_thermal_range = sienna_cichlid_set_thermal_range, + .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index afd786b6c0b4..afb3ef874fc5 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -59,9 +59,7 @@ #define smu_set_default_dpm_table(smu) smu_ppt_funcs(set_default_dpm_table, 0, smu) #define smu_populate_umd_state_clk(smu) smu_ppt_funcs(populate_umd_state_clk, 0, smu) #define smu_set_default_od8_settings(smu) smu_ppt_funcs(set_default_od8_settings, 0, smu) -#define smu_get_current_clk_freq(smu, clk_id, value) smu_ppt_funcs(get_current_clk_freq, 0, smu, clk_id, value) #define smu_tables_init(smu, tab) smu_ppt_funcs(tables_init, 0, smu, tab) -#define smu_set_thermal_fan_table(smu) smu_ppt_funcs(set_thermal_fan_table, 0, smu) #define smu_enable_thermal_alert(smu) smu_ppt_funcs(enable_thermal_alert, 0, smu) #define smu_disable_thermal_alert(smu) smu_ppt_funcs(disable_thermal_alert, 0, smu) #define smu_smc_read_sensor(smu, sensor, data, size) smu_ppt_funcs(read_sensor, -EINVAL, smu, sensor, data, size) @@ -69,15 +67,13 @@ #define smu_display_config_changed(smu) smu_ppt_funcs(display_config_changed, 0 , smu) #define smu_apply_clocks_adjust_rules(smu) smu_ppt_funcs(apply_clocks_adjust_rules, 0, smu) #define smu_notify_smc_display_config(smu) smu_ppt_funcs(notify_smc_display_config, 0, smu) -#define smu_force_dpm_limit_value(smu, highest) smu_ppt_funcs(force_dpm_limit_value, 0, smu, highest) -#define smu_unforce_dpm_levels(smu) smu_ppt_funcs(unforce_dpm_levels, 0, smu) #define smu_set_cpu_power_state(smu) smu_ppt_funcs(set_cpu_power_state, 0, smu) #define smu_msg_get_index(smu, msg) smu_ppt_funcs(get_smu_msg_index, -EINVAL, smu, msg) #define smu_clk_get_index(smu, clk) smu_ppt_funcs(get_smu_clk_index, -EINVAL, smu, clk) #define smu_feature_get_index(smu, fea) smu_ppt_funcs(get_smu_feature_index, -EINVAL, smu, fea) #define smu_table_get_index(smu, tab) smu_ppt_funcs(get_smu_table_index, -EINVAL, smu, tab) #define smu_power_get_index(smu, src) smu_ppt_funcs(get_smu_power_index, -EINVAL, smu, src) -#define smu_workload_get_type(smu, type) smu_ppt_funcs(get_smu_power_index, -EINVAL, smu, type) +#define smu_workload_get_type(smu, type) smu_ppt_funcs(get_workload_type, -EINVAL, smu, type) #define smu_run_btc(smu) smu_ppt_funcs(run_btc, 0, smu) #define smu_get_allowed_feature_mask(smu, feature_mask, num) smu_ppt_funcs(get_allowed_feature_mask, 0, smu, feature_mask, num) #define smu_store_cc6_data(smu, st, cc6_dis, pst_dis, pst_sw_dis) smu_ppt_funcs(store_cc6_data, 0, smu, st, cc6_dis, pst_dis, pst_sw_dis) @@ -87,18 +83,12 @@ #define smu_dpm_set_vcn_enable(smu, enable) smu_ppt_funcs(dpm_set_vcn_enable, 0, smu, enable) #define smu_dpm_set_jpeg_enable(smu, enable) smu_ppt_funcs(dpm_set_jpeg_enable, 0, smu, enable) #define smu_set_watermarks_table(smu, tab, clock_ranges) smu_ppt_funcs(set_watermarks_table, 0, smu, tab, clock_ranges) -#define smu_get_current_clk_freq_by_table(smu, clk_type, value) smu_ppt_funcs(get_current_clk_freq_by_table, 0, smu, clk_type, value) #define smu_thermal_temperature_range_update(smu, range, rw) smu_ppt_funcs(thermal_temperature_range_update, 0, smu, range, rw) -#define smu_get_thermal_temperature_range(smu, range) smu_ppt_funcs(get_thermal_temperature_range, 0, smu, range) #define smu_register_irq_handler(smu) smu_ppt_funcs(register_irq_handler, 0, smu) #define smu_get_dpm_ultimate_freq(smu, param, min, max) smu_ppt_funcs(get_dpm_ultimate_freq, 0, smu, param, min, max) #define smu_asic_set_performance_level(smu, level) smu_ppt_funcs(set_performance_level, -EINVAL, smu, level) #define smu_dump_pptable(smu) smu_ppt_funcs(dump_pptable, 0, smu) -#define smu_get_dpm_clk_limited(smu, clk_type, dpm_level, freq) smu_ppt_funcs(get_dpm_clk_limited, -EINVAL, smu, clk_type, dpm_level, freq) -#define smu_set_soft_freq_limited_range(smu, clk_type, min, max) smu_ppt_funcs(set_soft_freq_limited_range, -EINVAL, smu, clk_type, min, max) -#define smu_override_pcie_parameters(smu) smu_ppt_funcs(override_pcie_parameters, 0, smu) #define smu_update_pcie_parameters(smu, pcie_gen_cap, pcie_width_cap) smu_ppt_funcs(update_pcie_parameters, 0, smu, pcie_gen_cap, pcie_width_cap) -#define smu_set_thermal_range(smu, range) smu_ppt_funcs(set_thermal_range, 0, smu, range) #define smu_disable_umc_cdr_12gbps_workaround(smu) smu_ppt_funcs(disable_umc_cdr_12gbps_workaround, 0, smu) #define smu_set_power_source(smu, power_src) smu_ppt_funcs(set_power_source, 0, smu, power_src) #define smu_i2c_eeprom_init(smu, control) smu_ppt_funcs(i2c_eeprom_init, 0, smu, control) @@ -106,7 +96,5 @@ #define smu_get_unique_id(smu) smu_ppt_funcs(get_unique_id, 0, smu) #define smu_log_thermal_throttling(smu) smu_ppt_funcs(log_thermal_throttling_event, 0, smu) #define smu_get_asic_power_limits(smu) smu_ppt_funcs(get_power_limit, 0, smu) -#define smu_get_profiling_clk_mask(smu, level, sclk_mask, mclk_mask, soc_mask) \ - smu_ppt_funcs(get_profiling_clk_mask, 0, smu, level, sclk_mask, mclk_mask, soc_mask) #endif diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index f24983a8876d..a7336556dc36 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -35,7 +35,6 @@ #include "smu_v11_0.h" #include "soc15_common.h" #include "atom.h" -#include "amd_pcie.h" #include "amdgpu_ras.h" #include "asic_reg/thm/thm_11_0_2_offset.h" @@ -60,9 +59,12 @@ MODULE_FIRMWARE("amdgpu/navi10_smc.bin"); MODULE_FIRMWARE("amdgpu/navi14_smc.bin"); MODULE_FIRMWARE("amdgpu/navi12_smc.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_smc.bin"); +MODULE_FIRMWARE("amdgpu/navy_flounder_smc.bin"); #define SMU11_VOLTAGE_SCALE 4 +#define SMU11_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms + static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu, uint16_t msg) { @@ -172,8 +174,12 @@ int smu_v11_0_init_microcode(struct smu_context *smu) case CHIP_SIENNA_CICHLID: chip_name = "sienna_cichlid"; break; + case CHIP_NAVY_FLOUNDER: + chip_name = "navy_flounder"; + break; default: - BUG(); + dev_err(adev->dev, "Unsupported ASIC type %d\n", adev->asic_type); + return -EINVAL; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name); @@ -303,6 +309,9 @@ int smu_v11_0_check_fw_version(struct smu_context *smu) case CHIP_SIENNA_CICHLID: smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_Sienna_Cichlid; break; + case CHIP_NAVY_FLOUNDER: + smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_Navy_Flounder; + break; default: dev_err(smu->adev->dev, "smu unsupported asic type:%d.\n", smu->adev->asic_type); smu->smc_driver_if_version = SMU11_DRIVER_IF_VERSION_INV; @@ -384,7 +393,8 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; version_major = le16_to_cpu(hdr->header.header_version_major); version_minor = le16_to_cpu(hdr->header.header_version_minor); - if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) { + if ((version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) || + adev->asic_type == CHIP_NAVY_FLOUNDER) { dev_info(adev->dev, "use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); switch (version_minor) { case 0: @@ -816,6 +826,11 @@ int smu_v11_0_set_tool_table_location(struct smu_context *smu) int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count) { int ret = 0; + struct amdgpu_device *adev = smu->adev; + + /* Navy_Flounder do not support to change display num currently */ + if (adev->asic_type == CHIP_NAVY_FLOUNDER) + return 0; if (!smu->pm_enabled) return ret; @@ -1083,63 +1098,12 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) return 0; } -int smu_v11_0_get_current_clk_freq(struct smu_context *smu, - enum smu_clk_type clk_id, - uint32_t *value) -{ - int ret = 0; - uint32_t freq = 0; - - if (clk_id >= SMU_CLK_COUNT || !value) - return -EINVAL; - - ret = smu_get_current_clk_freq_by_table(smu, clk_id, &freq); - if (ret) - return ret; - - freq *= 100; - *value = freq; - - return ret; -} - int smu_v11_0_enable_thermal_alert(struct smu_context *smu) { - int ret = 0; - struct smu_temperature_range range; - struct amdgpu_device *adev = smu->adev; - - memcpy(&range, &smu11_thermal_policy[0], sizeof(struct smu_temperature_range)); - - ret = smu_get_thermal_temperature_range(smu, &range); - if (ret) - return ret; + if (smu->smu_table.thermal_controller_type) + return amdgpu_irq_get(smu->adev, &smu->irq_source, 0); - if (smu->smu_table.thermal_controller_type) { - ret = smu_set_thermal_range(smu, range); - if (ret) - return ret; - - ret = amdgpu_irq_get(adev, &smu->irq_source, 0); - if (ret) - return ret; - - ret = smu_set_thermal_fan_table(smu); - if (ret) - return ret; - } - - adev->pm.dpm.thermal.min_temp = range.min; - adev->pm.dpm.thermal.max_temp = range.max; - adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; - adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; - adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; - adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; - adev->pm.dpm.thermal.min_mem_temp = range.mem_min; - adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; - adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; - - return ret; + return 0; } int smu_v11_0_disable_thermal_alert(struct smu_context *smu) @@ -1152,7 +1116,7 @@ static uint16_t convert_to_vddc(uint8_t vid) return (uint16_t) ((6200 - (vid * 25)) / SMU11_VOLTAGE_SCALE); } -static int smu_v11_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value) +int smu_v11_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value) { struct amdgpu_device *adev = smu->adev; uint32_t vdd = 0, val_vid = 0; @@ -1171,39 +1135,6 @@ static int smu_v11_0_get_gfx_vdd(struct smu_context *smu, uint32_t *value) } -int smu_v11_0_read_sensor(struct smu_context *smu, - enum amd_pp_sensors sensor, - void *data, uint32_t *size) -{ - int ret = 0; - - if(!data || !size) - return -EINVAL; - - switch (sensor) { - case AMDGPU_PP_SENSOR_GFX_MCLK: - ret = smu_get_current_clk_freq(smu, SMU_UCLK, (uint32_t *)data); - *size = 4; - break; - case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = smu_get_current_clk_freq(smu, SMU_GFXCLK, (uint32_t *)data); - *size = 4; - break; - case AMDGPU_PP_SENSOR_VDDGFX: - ret = smu_v11_0_get_gfx_vdd(smu, (uint32_t *)data); - *size = 4; - break; - default: - ret = -EOPNOTSUPP; - break; - } - - if (ret) - *size = 0; - - return ret; -} - int smu_v11_0_display_clock_voltage_request(struct smu_context *smu, struct pp_display_clock_request @@ -1244,7 +1175,7 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu, if (clk_select == SMU_UCLK && smu->disable_uclk_switch) return 0; - ret = smu_set_hard_freq_range(smu, clk_select, clk_freq, 0); + ret = smu_v11_0_set_hard_freq_limited_range(smu, clk_select, clk_freq, 0); if(clk_select == SMU_UCLK) smu->hard_min_uclk_req_from_dal = clk_freq; @@ -1417,6 +1348,8 @@ static int smu_v11_0_set_irq_state(struct amdgpu_device *adev, unsigned tyep, enum amdgpu_interrupt_state state) { + struct smu_context *smu = &adev->smu; + uint32_t low, high; uint32_t val = 0; switch (state) { @@ -1437,9 +1370,19 @@ static int smu_v11_0_set_irq_state(struct amdgpu_device *adev, break; case AMDGPU_IRQ_STATE_ENABLE: /* For THM irqs */ + low = max(SMU_THERMAL_MINIMUM_ALERT_TEMP, + smu->thermal_range.min / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES); + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0); val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high & 0xff)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low & 0xff)); + val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); val = (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT); @@ -1746,11 +1689,50 @@ int smu_v11_0_baco_exit(struct smu_context *smu) return ret; } +int smu_v11_0_mode1_reset(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + if (!ret) + msleep(SMU11_MODE1_RESET_WAIT_TIME_IN_MS); + + return ret; +} + int smu_v11_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max) { int ret = 0, clk_id = 0; uint32_t param = 0; + uint32_t clock_limit; + + if (!smu_clk_dpm_is_enabled(smu, clk_type)) { + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + clock_limit = smu->smu_table.boot_values.uclk; + break; + case SMU_GFXCLK: + case SMU_SCLK: + clock_limit = smu->smu_table.boot_values.gfxclk; + break; + case SMU_SOCCLK: + clock_limit = smu->smu_table.boot_values.socclk; + break; + default: + clock_limit = 0; + break; + } + + /* clock in Mhz unit */ + if (min) + *min = clock_limit / 100; + if (max) + *max = clock_limit / 100; + + return 0; + } clk_id = smu_clk_get_index(smu, clk_type); if (clk_id < 0) { @@ -1775,9 +1757,12 @@ failed: return ret; } -int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t min, uint32_t max) +int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t min, + uint32_t max) { + struct amdgpu_device *adev = smu->adev; int ret = 0, clk_id = 0; uint32_t param; @@ -1785,12 +1770,15 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ if (clk_id < 0) return clk_id; + if (clk_type == SMU_GFXCLK) + amdgpu_gfx_off_ctrl(adev, false); + if (max > 0) { param = (uint32_t)((clk_id << 16) | (max & 0xffff)); ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxByFreq, param, NULL); if (ret) - return ret; + goto out; } if (min > 0) { @@ -1798,88 +1786,151 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinByFreq, param, NULL); if (ret) - return ret; + goto out; } +out: + if (clk_type == SMU_GFXCLK) + amdgpu_gfx_off_ctrl(adev, true); + return ret; } -int smu_v11_0_override_pcie_parameters(struct smu_context *smu) +int smu_v11_0_set_hard_freq_limited_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t min, + uint32_t max) { - struct amdgpu_device *adev = smu->adev; - uint32_t pcie_gen = 0, pcie_width = 0; - int ret; + int ret = 0, clk_id = 0; + uint32_t param; - if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) - pcie_gen = 3; - else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) - pcie_gen = 2; - else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) - pcie_gen = 1; - else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) - pcie_gen = 0; - - /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 - * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 - * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 - */ - if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) - pcie_width = 6; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) - pcie_width = 5; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) - pcie_width = 4; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) - pcie_width = 3; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) - pcie_width = 2; - else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) - pcie_width = 1; - - ret = smu_update_pcie_parameters(smu, pcie_gen, pcie_width); + if (min <= 0 && max <= 0) + return -EINVAL; - if (ret) - dev_err(adev->dev, "[%s] Attempt to override pcie params failed!\n", __func__); + if (!smu_clk_dpm_is_enabled(smu, clk_type)) + return 0; - return ret; + clk_id = smu_clk_get_index(smu, clk_type); + if (clk_id < 0) + return clk_id; + if (max > 0) { + param = (uint32_t)((clk_id << 16) | (max & 0xffff)); + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMaxByFreq, + param, NULL); + if (ret) + return ret; + } + + if (min > 0) { + param = (uint32_t)((clk_id << 16) | (min & 0xffff)); + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinByFreq, + param, NULL); + if (ret) + return ret; + } + + return ret; } int smu_v11_0_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level) { + struct smu_11_0_dpm_context *dpm_context = + smu->smu_dpm.dpm_context; + struct smu_11_0_dpm_table *gfx_table = + &dpm_context->dpm_tables.gfx_table; + struct smu_11_0_dpm_table *mem_table = + &dpm_context->dpm_tables.uclk_table; + struct smu_11_0_dpm_table *soc_table = + &dpm_context->dpm_tables.soc_table; + struct smu_umd_pstate_table *pstate_table = + &smu->pstate_table; + struct amdgpu_device *adev = smu->adev; + uint32_t sclk_min = 0, sclk_max = 0; + uint32_t mclk_min = 0, mclk_max = 0; + uint32_t socclk_min = 0, socclk_max = 0; int ret = 0; - uint32_t sclk_mask, mclk_mask, soc_mask; switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: - ret = smu_force_dpm_limit_value(smu, true); + sclk_min = sclk_max = gfx_table->max; + mclk_min = mclk_max = mem_table->max; + socclk_min = socclk_max = soc_table->max; break; case AMD_DPM_FORCED_LEVEL_LOW: - ret = smu_force_dpm_limit_value(smu, false); + sclk_min = sclk_max = gfx_table->min; + mclk_min = mclk_max = mem_table->min; + socclk_min = socclk_max = soc_table->min; break; case AMD_DPM_FORCED_LEVEL_AUTO: + sclk_min = gfx_table->min; + sclk_max = gfx_table->max; + mclk_min = mem_table->min; + mclk_max = mem_table->max; + socclk_min = soc_table->min; + socclk_max = soc_table->max; + break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: - ret = smu_unforce_dpm_levels(smu); + sclk_min = sclk_max = pstate_table->gfxclk_pstate.standard; + mclk_min = mclk_max = pstate_table->uclk_pstate.standard; + socclk_min = socclk_max = pstate_table->socclk_pstate.standard; break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + sclk_min = sclk_max = pstate_table->gfxclk_pstate.min; + break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + mclk_min = mclk_max = pstate_table->uclk_pstate.min; + break; case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: - ret = smu_get_profiling_clk_mask(smu, level, - &sclk_mask, - &mclk_mask, - &soc_mask); - if (ret) - return ret; - smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask, false); - smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask, false); - smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask, false); + sclk_min = sclk_max = pstate_table->gfxclk_pstate.peak; + mclk_min = mclk_max = pstate_table->uclk_pstate.peak; + socclk_min = socclk_max = pstate_table->socclk_pstate.peak; break; case AMD_DPM_FORCED_LEVEL_MANUAL: case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: + return 0; default: - break; + dev_err(adev->dev, "Invalid performance level %d\n", level); + return -EINVAL; + } + + /* + * Separate MCLK and SOCCLK soft min/max settings are not allowed + * on Arcturus. + */ + if (adev->asic_type == CHIP_ARCTURUS) { + mclk_min = mclk_max = 0; + socclk_min = socclk_max = 0; + } + + if (sclk_min && sclk_max) { + ret = smu_v11_0_set_soft_freq_limited_range(smu, + SMU_GFXCLK, + sclk_min, + sclk_max); + if (ret) + return ret; + } + + if (mclk_min && mclk_max) { + ret = smu_v11_0_set_soft_freq_limited_range(smu, + SMU_MCLK, + mclk_min, + mclk_max); + if (ret) + return ret; } + + if (socclk_min && socclk_max) { + ret = smu_v11_0_set_soft_freq_limited_range(smu, + SMU_SOCCLK, + socclk_min, + socclk_max); + if (ret) + return ret; + } + return ret; } @@ -1898,3 +1949,125 @@ int smu_v11_0_set_power_source(struct smu_context *smu, NULL); } +int smu_v11_0_get_dpm_freq_by_index(struct smu_context *smu, + enum smu_clk_type clk_type, + uint16_t level, + uint32_t *value) +{ + int ret = 0, clk_id = 0; + uint32_t param; + + if (!value) + return -EINVAL; + + if (!smu_clk_dpm_is_enabled(smu, clk_type)) + return 0; + + clk_id = smu_clk_get_index(smu, clk_type); + if (clk_id < 0) + return clk_id; + + param = (uint32_t)(((clk_id & 0xffff) << 16) | (level & 0xffff)); + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_GetDpmFreqByIndex, + param, + value); + if (ret) + return ret; + + /* + * BIT31: 0 - Fine grained DPM, 1 - Dicrete DPM + * now, we un-support it + */ + *value = *value & 0x7fffffff; + + return ret; +} + +int smu_v11_0_get_dpm_level_count(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *value) +{ + return smu_v11_0_get_dpm_freq_by_index(smu, + clk_type, + 0xff, + value); +} + +int smu_v11_0_set_single_dpm_table(struct smu_context *smu, + enum smu_clk_type clk_type, + struct smu_11_0_dpm_table *single_dpm_table) +{ + int ret = 0; + uint32_t clk; + int i; + + ret = smu_v11_0_get_dpm_level_count(smu, + clk_type, + &single_dpm_table->count); + if (ret) { + dev_err(smu->adev->dev, "[%s] failed to get dpm levels!\n", __func__); + return ret; + } + + for (i = 0; i < single_dpm_table->count; i++) { + ret = smu_v11_0_get_dpm_freq_by_index(smu, + clk_type, + i, + &clk); + if (ret) { + dev_err(smu->adev->dev, "[%s] failed to get dpm freq by index!\n", __func__); + return ret; + } + + single_dpm_table->dpm_levels[i].value = clk; + single_dpm_table->dpm_levels[i].enabled = true; + + if (i == 0) + single_dpm_table->min = clk; + else if (i == single_dpm_table->count - 1) + single_dpm_table->max = clk; + } + + return 0; +} + +int smu_v11_0_get_dpm_level_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min_value, + uint32_t *max_value) +{ + uint32_t level_count = 0; + int ret = 0; + + if (!min_value && !max_value) + return -EINVAL; + + if (min_value) { + /* by default, level 0 clock value as min value */ + ret = smu_v11_0_get_dpm_freq_by_index(smu, + clk_type, + 0, + min_value); + if (ret) + return ret; + } + + if (max_value) { + ret = smu_v11_0_get_dpm_level_count(smu, + clk_type, + &level_count); + if (ret) + return ret; + + ret = smu_v11_0_get_dpm_freq_by_index(smu, + clk_type, + level_count - 1, + max_value); + if (ret) + return ret; + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c index b03127273d56..4e1b11d07438 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c @@ -203,35 +203,6 @@ int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) NULL); } -int smu_v12_0_read_sensor(struct smu_context *smu, - enum amd_pp_sensors sensor, - void *data, uint32_t *size) -{ - int ret = 0; - - if(!data || !size) - return -EINVAL; - - switch (sensor) { - case AMDGPU_PP_SENSOR_GFX_MCLK: - ret = smu_get_current_clk_freq(smu, SMU_UCLK, (uint32_t *)data); - *size = 4; - break; - case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = smu_get_current_clk_freq(smu, SMU_GFXCLK, (uint32_t *)data); - *size = 4; - break; - default: - ret = -EOPNOTSUPP; - break; - } - - if (ret) - *size = 0; - - return ret; -} - /** * smu_v12_0_get_gfxoff_status - get gfxoff status * @@ -345,98 +316,6 @@ int smu_v12_0_get_enabled_mask(struct smu_context *smu, return ret; } -int smu_v12_0_get_current_clk_freq(struct smu_context *smu, - enum smu_clk_type clk_id, - uint32_t *value) -{ - int ret = 0; - uint32_t freq = 0; - - if (clk_id >= SMU_CLK_COUNT || !value) - return -EINVAL; - - ret = smu_get_current_clk_freq_by_table(smu, clk_id, &freq); - if (ret) - return ret; - - freq *= 100; - *value = freq; - - return ret; -} - -int smu_v12_0_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, - uint32_t *min, uint32_t *max) -{ - int ret = 0; - uint32_t mclk_mask, soc_mask; - - if (max) { - ret = smu_get_profiling_clk_mask(smu, AMD_DPM_FORCED_LEVEL_PROFILE_PEAK, - NULL, - &mclk_mask, - &soc_mask); - if (ret) - goto failed; - - switch (clk_type) { - case SMU_GFXCLK: - case SMU_SCLK: - ret = smu_send_smc_msg(smu, SMU_MSG_GetMaxGfxclkFrequency, max); - if (ret) { - dev_err(smu->adev->dev, "Attempt to get max GX frequency from SMC Failed !\n"); - goto failed; - } - break; - case SMU_UCLK: - case SMU_FCLK: - case SMU_MCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, mclk_mask, max); - if (ret) - goto failed; - break; - case SMU_SOCCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, soc_mask, max); - if (ret) - goto failed; - break; - default: - ret = -EINVAL; - goto failed; - } - } - - if (min) { - switch (clk_type) { - case SMU_GFXCLK: - case SMU_SCLK: - ret = smu_send_smc_msg(smu, SMU_MSG_GetMinGfxclkFrequency, min); - if (ret) { - dev_err(smu->adev->dev, "Attempt to get min GX frequency from SMC Failed !\n"); - goto failed; - } - break; - case SMU_UCLK: - case SMU_FCLK: - case SMU_MCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, 0, min); - if (ret) - goto failed; - break; - case SMU_SOCCLK: - ret = smu_get_dpm_clk_limited(smu, clk_type, 0, min); - if (ret) - goto failed; - break; - default: - ret = -EINVAL; - goto failed; - } - } -failed: - return ret; -} - int smu_v12_0_mode2_reset(struct smu_context *smu){ return smu_v12_0_send_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, SMU_RESET_MODE_2, NULL); } diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 86ac032275bb..ba20c6f03719 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5563,6 +5563,7 @@ static int ci_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; + rdev->pm.dpm.num_ps = 0; for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -5572,10 +5573,8 @@ static int ci_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.power_state[i].clock_info) return -EINVAL; ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(rdev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } rdev->pm.dpm.ps[i].ps_priv = ps; ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], non_clock_info, @@ -5597,8 +5596,8 @@ static int ci_parse_power_table(struct radeon_device *rdev) k++; } power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + rdev->pm.dpm.num_ps = i + 1; } - rdev->pm.dpm.num_ps = state_array->ucNumEntries; /* fill in the vce power states */ for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 43f2f9307866..8735bf2bb8b5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -865,8 +865,8 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); rdev->fence_drv[ring].initialized = true; - dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", - ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); + dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx\n", + ring, rdev->fence_drv[ring].gpu_addr); return 0; } diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h index 0c5bd1134460..8712e14991ed 100644 --- a/include/drm/amd_asic_type.h +++ b/include/drm/amd_asic_type.h @@ -55,6 +55,7 @@ enum amd_asic_type { CHIP_NAVI14, /* 26 */ CHIP_NAVI12, /* 27 */ CHIP_SIENNA_CICHLID, /* 28 */ + CHIP_NAVY_FLOUNDER, /* 29 */ CHIP_LAST, }; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b6be62356d34..f738c3b53f4e 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -26,8 +26,12 @@ #include <drm/drm.h> #include <linux/ioctl.h> +/* + * - 1.1 - initial version + * - 1.3 - Add SMI events support + */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 1 +#define KFD_IOCTL_MINOR_VERSION 3 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -442,6 +446,17 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* + * KFD SMI(System Management Interface) events + */ +/* Event type (defined by bitmask) */ +#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 + +struct kfd_ioctl_smi_events_args { + __u32 gpuid; /* to KFD */ + __u32 anon_fd; /* from KFD */ +}; + /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -546,7 +561,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_ALLOC_QUEUE_GWS \ AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) +#define AMDKFD_IOC_SMI_EVENTS \ + AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1F +#define AMDKFD_COMMAND_END 0x20 #endif |