diff options
author | Dave Airlie | 2022-07-27 09:33:44 +1000 |
---|---|---|
committer | Dave Airlie | 2022-07-27 09:33:45 +1000 |
commit | ee8b1ef9a6b089abf7a9c7d094b6e93fa05f15b9 (patch) | |
tree | 04d9ed5e31f325b40e4d8c6af8b9de8e3c6394e5 /drivers/gpu/drm | |
parent | 417c1c1963549e9a48b83ada59d90258e38c6594 (diff) | |
parent | 1b54a0121dba12af268fb75c413feabdb9f573d4 (diff) |
Merge tag 'amd-drm-next-5.20-2022-07-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amdgpu:
- VCN4 fixes
- RAS support for UMC 8.10
- ACP support for jadeite platforms
- NBIO HDP flush fixes
- Misc spelling and grammar fixes
- Runtime PM fixes
- Non-DC HPD fix
- Clean up amdgpu DM code
- DSC fixes
- Expose some additional GFXOFF data via debugfs
- More FP clean up for new DCN blocks
- PPC DC FP fixes
- DCN 3.1.4 fixes
- DC DML stack usage fixes
- GMC fixes
- SPM fixes for RDNA2
amdkfd:
- MMU notifier fix
- Mutex fix
UAPI:
- Add a comment about VCN4 unified queues
- IP version information for UMDs
Proposed mesa change: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411/diffs?commit_id=c8a63590dfd0d64e6e6a634dcfed993f135dd075
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220726181536.5759-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm')
165 files changed, 9490 insertions, 6901 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 74a8105fd2c0..7777d55275de 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,7 +4,7 @@ config DRM_AMDGPU_SI depends on DRM_AMDGPU help Choose this option if you want to enable experimental support - for SI asics. + for SI (Southern Islands) asics. SI is already supported in radeon. Experimental support for SI in amdgpu will be disabled by default and is still provided by @@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable support for CIK asics. + Choose this option if you want to enable support for CIK (Sea + Islands) asics. CIK is already supported in radeon. Support for CIK in amdgpu will be disabled by default and is still provided by radeon. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index a87e42c2c8dc..c7d0cd15b5ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -93,7 +93,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2871a3e3801f..b075845a5328 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; +extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; @@ -1011,7 +1012,6 @@ struct amdgpu_device { uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; /* enable runtime pm on the device */ - bool runpm; bool in_runpm; bool has_pr3; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index cc9c9f8b23b2..bcc7ee02e0fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -29,6 +29,8 @@ #include <linux/platform_device.h> #include <sound/designware_i2s.h> #include <sound/pcm.h> +#include <linux/acpi.h> +#include <linux/dmi.h> #include "amdgpu.h" #include "atom.h" @@ -36,6 +38,7 @@ #include "acp_gfx_if.h" +#define ST_JADEITE 1 #define ACP_TILE_ON_MASK 0x03 #define ACP_TILE_OFF_MASK 0x02 #define ACP_TILE_ON_RETAIN_REG_MASK 0x1f @@ -85,6 +88,8 @@ #define ACP_DEVS 4 #define ACP_SRC_ID 162 +static unsigned long acp_machine_id; + enum { ACP_TILE_P1 = 0, ACP_TILE_P2, @@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to POWER GATE ACP block * smu will * 1. turn off the acp clock * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; } @@ -147,16 +150,14 @@ static int acp_poweron(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to UNGATE ACP block * smu will * 1. exit ulv * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; } @@ -184,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data) return 0; } +static int acp_quirk_cb(const struct dmi_system_id *id) +{ + acp_machine_id = ST_JADEITE; + return 1; +} + +static const struct dmi_system_id acp_quirk_table[] = { + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"), + } + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"), + }, + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"), + }, + }, + {} +}; + /** * acp_hw_init - start and test ACP block * @@ -193,7 +225,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data) static int acp_hw_init(void *handle) { int r; - uint64_t acp_base; + u64 acp_base; u32 val = 0; u32 count = 0; struct i2s_platform_data *i2s_pdata = NULL; @@ -220,141 +252,210 @@ static int acp_hw_init(void *handle) return -EINVAL; acp_base = adev->rmmio_base; - - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) + if (!adev->acp.acp_genpd) return -ENOMEM; adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd->gpd.power_off = acp_poweroff; adev->acp.acp_genpd->gpd.power_on = acp_poweron; - - adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + dmi_check_system(acp_quirk_table); + switch (acp_machine_id) { + case ST_JADEITE: + { + adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell), + GFP_KERNEL); + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), - GFP_KERNEL); - - if (adev->acp.acp_cell == NULL) { - r = -ENOMEM; - goto failure; - } - - adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (adev->acp.acp_res == NULL) { - r = -ENOMEM; - goto failure; - } + adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } - i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (i2s_pdata == NULL) { - r = -ENOMEM; - goto failure; - } + i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } - switch (adev->asic_type) { - case CHIP_STONEY: i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dma_irq"; + adev->acp.acp_res[2].flags = IORESOURCE_IRQ; + adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[2].end = adev->acp.acp_res[2].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 3; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2); + if (r) + goto failure; + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; break; - default: - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; } - i2s_pdata[0].cap = DWC_I2S_PLAY; - i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; - i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1 | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; default: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; - } + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), + GFP_KERNEL); - i2s_pdata[1].cap = DWC_I2S_RECORD; - i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; - i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; - default: - break; - } + adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } + + i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } + + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } + i2s_pdata[0].cap = DWC_I2S_PLAY; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } + + i2s_pdata[1].cap = DWC_I2S_RECORD; + i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } - i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; - i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; - i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; - - adev->acp.acp_res[0].name = "acp2x_dma"; - adev->acp.acp_res[0].flags = IORESOURCE_MEM; - adev->acp.acp_res[0].start = acp_base; - adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; - - adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; - adev->acp.acp_res[1].flags = IORESOURCE_MEM; - adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; - adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; - - adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; - adev->acp.acp_res[2].flags = IORESOURCE_MEM; - adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; - adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; - - adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; - adev->acp.acp_res[3].flags = IORESOURCE_MEM; - adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; - adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; - - adev->acp.acp_res[4].name = "acp2x_dma_irq"; - adev->acp.acp_res[4].flags = IORESOURCE_IRQ; - adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); - adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; - - adev->acp.acp_cell[0].name = "acp_audio_dma"; - adev->acp.acp_cell[0].num_resources = 5; - adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; - adev->acp.acp_cell[0].platform_data = &adev->asic_type; - adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); - - adev->acp.acp_cell[1].name = "designware-i2s"; - adev->acp.acp_cell[1].num_resources = 1; - adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; - adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; - adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[2].name = "designware-i2s"; - adev->acp.acp_cell[2].num_resources = 1; - adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; - adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; - adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[3].name = "designware-i2s"; - adev->acp.acp_cell[3].num_resources = 1; - adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; - adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; - adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, - ACP_DEVS); - if (r) - goto failure; - - r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, - acp_genpd_add_device); - if (r) - goto failure; + i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; + i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; + + i2s_pdata[3].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[3].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; + adev->acp.acp_res[2].flags = IORESOURCE_MEM; + adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; + adev->acp.acp_res[3].flags = IORESOURCE_MEM; + adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; + adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; + + adev->acp.acp_res[4].name = "acp2x_dma_irq"; + adev->acp.acp_res[4].flags = IORESOURCE_IRQ; + adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 5; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[2].name = "designware-i2s"; + adev->acp.acp_cell[2].num_resources = 1; + adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; + adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; + adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[3].name = "designware-i2s"; + adev->acp.acp_cell[3].num_resources = 1; + adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; + adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; + adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); + + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); + if (r) + goto failure; + + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; + } /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -546,8 +647,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .set_powergating_state = acp_set_powergating_state, }; -const struct amdgpu_ip_block_version acp_ip_block = -{ +const struct amdgpu_ip_block_version acp_ip_block = { .type = AMD_IP_BLOCK_TYPE_ACP, .major = 2, .minor = 2, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 581c7ae41102..08997092e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -115,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) - -static size_t amdgpu_amdkfd_acc_size(uint64_t size) -{ - size >>= PAGE_SHIFT; - size *= sizeof(dma_addr_t) + sizeof(void *); - return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + - __roundup_pow_of_two(sizeof(struct ttm_tt)) + - PAGE_ALIGN(size); -} +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) /** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size - * of buffer including any reserved for control structures + * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be @@ -143,19 +134,16 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); - size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; - acc_size = amdgpu_amdkfd_acc_size(size); - + system_mem_needed = 0; + ttm_mem_needed = 0; vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size + size; + system_mem_needed = size; + ttm_mem_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - /* * Conservatively round up the allocation requirement to 2 MB * to avoid fragmentation caused by 4K allocations in the tail @@ -163,14 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - } else { + system_mem_needed = size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); return -ENOMEM; } @@ -208,28 +192,18 @@ release: static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { - size_t acc_size; - - acc_size = amdgpu_amdkfd_acc_size(size); - spin_lock(&kfd_mem_limit.mem_limit_lock); if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= (acc_size + size); + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + kfd_mem_limit.system_mem_used -= size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } @@ -436,45 +410,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; case CHIP_ALDEBARAN: - if (coherent && uncached) { - if (adev->gmc.xgmi.connected_to_cpu || - !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (adev->asic_type == CHIP_ALDEBARAN && + adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; snoop = true; - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) + snoop = true; } pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6..2168163aad2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf..9caea1688fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e..d8f1335bc68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ out_free_user_pages: kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 2ef5296216d6..8ee4e8491f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -272,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) return res; } -static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, - struct drm_file *filp, struct amdgpu_ctx *ctx) -{ - int r; - - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - - kref_init(&ctx->refcount); - ctx->mgr = mgr; - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; - - return 0; -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -326,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, return 0; } +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) +{ + u32 current_stable_pstate; + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + kref_init(&ctx->refcount); + ctx->mgr = mgr; + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r) + return r; + + ctx->stable_pstate = current_stable_pstate; + + return 0; +} + static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { @@ -397,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(&adev->ddev, &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f3b3c688e4e7..e2eec985adb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, } while (size) { - uint32_t value; + u32 value = adev->gfx.gfx_off_state; + + r = put_user(value, (u32 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; r = amdgpu_get_gfx_off_status(adev, &value); if (r) goto out; - r = put_user(value, (uint32_t *)buf); + r = put_user(value, (u32 *)buf); if (r) goto out; @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_status_read, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_wave_fops, &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, + &amdgpu_debugfs_gfxoff_status_fops, }; static const char *debugfs_regs_names[] = { @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = { "amdgpu_wave", "amdgpu_gpr", "amdgpu_gfxoff", + "amdgpu_gfxoff_status", }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e1c9587f659b..041bd906449d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5230,8 +5230,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && (job->hw_fence.ops != NULL) && - dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 37234c2998d7..242d1847c4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1716,6 +1716,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): @@ -2206,12 +2207,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - break; case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): @@ -2225,15 +2223,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3d139708160..429fcdf28836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -80,7 +80,7 @@ * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. - * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. @@ -100,10 +100,11 @@ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface * - 3.46.0 - To enable hot plug amdgpu tests in libdrm - * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.48.0 - Add IP discovery version info to HW INFO */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 47 +#define KMS_DRIVER_MINOR 48 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -167,6 +168,7 @@ int amdgpu_smu_pptable_id = -1; */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; +uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp; int amdgpu_discovery = -1; @@ -827,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); +MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)"); +module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); + /** * DOC: abmlevel (uint) * Override the default ABM (Adaptive Backlight Management) level used for DC @@ -2121,7 +2126,7 @@ retry_init: if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ if (amdgpu_device_supports_px(ddev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); @@ -2178,7 +2183,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -2461,7 +2466,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret, i; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } @@ -2530,7 +2535,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret; - if (!adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) return -EINVAL; /* Avoids registers access if device is physically gone */ @@ -2574,7 +2579,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ int ret = 1; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ff659d4f772b..8adeb7469f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -47,7 +47,7 @@ * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence * are no longer in use by the associated ring on the GPU and - * that the the relevant GPU caches have been flushed. + * that the relevant GPU caches have been flushed. */ struct amdgpu_fence { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 36c1be77bf8f..5071b96be982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -133,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; - struct dma_fence *hw_fence; unsigned i; - if (job->hw_fence.ops == NULL) - hw_fence = job->external_hw_fence; - else - hw_fence = &job->hw_fence; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; + f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -156,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } void amdgpu_job_free(struct amdgpu_job *job) @@ -169,11 +159,7 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -203,15 +189,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, int r; job->base.sched = &ring->sched; - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - /* record external_hw_fence for direct submit */ - job->external_hw_fence = dma_fence_get(*fence); + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence); + if (r) return r; amdgpu_job_free(job); - dma_fence_put(*fence); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d599c0540b46..babc0af751c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -50,7 +50,6 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct dma_fence hw_fence; - struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6de63ea6687e..1369c25448dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,17 +43,6 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" -static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) -{ - /* - * Add below quirk on several sienna_cichlid cards to disable - * runtime pm to fix EMI failures. - */ - if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || - ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) - adev->runpm = false; -} - void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -158,37 +147,36 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - /* enable runpm if runpm=1 */ + /* enable BACO as runpm mode if runpm=1 */ if (amdgpu_runtime_pm > 0) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; case CHIP_VEGA10: - /* turn runpm on if noretry=0 */ + /* enable BACO as runpm mode if noretry=0 */ if (!adev->gmc.noretry) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; default: - /* enable runpm on CI+ */ - adev->runpm = true; + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; } - amdgpu_runtime_pm_quirk(adev); - - if (adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) dev_info(adev->dev, "Using BACO for runtime pm\n"); } @@ -473,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, result->hw_ip_version_major = adev->ip_blocks[i].version->major; result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; + + if (adev->asic_type >= CHIP_VEGA10) { + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_SDMA: + result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_JPEG: + result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_VCE: + result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; + break; + default: + result->ip_discovery_version = 0; + break; + } + } else { + result->ip_discovery_version = 0; + } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; result->ib_start_alignment = ib_start_alignment; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e9411c28d88b..3ee363bfbac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2168,6 +2168,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_RLC_DRAM: *type = GFX_FW_TYPE_RLC_DRAM_BOOT; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + *type = GFX_FW_TYPE_SE0_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + *type = GFX_FW_TYPE_SE1_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + *type = GFX_FW_TYPE_SE2_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + *type = GFX_FW_TYPE_SE3_TAP_DELAYS; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; @@ -2348,6 +2363,13 @@ static int psp_load_smu_fw(struct psp_context *psp) &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; struct amdgpu_ras *ras = psp->ras_context.ras; + /* + * Skip SMU FW reloading in case of using BACO for runpm only, + * as SMU is always alive. + */ + if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + return 0; + if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e431f4994931..180634616b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -69,8 +69,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, - PSP_BL__LOAD_INTFDRV = 0xC0000, - PSP_BL__LOAD_DBGDRV = 0xD0000, + PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_INTFDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index f6fd9e1a7dac..03ac36b2c2cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -222,6 +222,11 @@ struct amdgpu_rlc { u32 rlc_dram_ucode_size_bytes; u32 rlcp_ucode_size_bytes; u32 rlcv_ucode_size_bytes; + u32 global_tap_delays_ucode_size_bytes; + u32 se0_tap_delays_ucode_size_bytes; + u32 se1_tap_delays_ucode_size_bytes; + u32 se2_tap_delays_ucode_size_bytes; + u32 se3_tap_delays_ucode_size_bytes; u32 *register_list_format; u32 *register_restore; @@ -232,6 +237,11 @@ struct amdgpu_rlc { u8 *rlc_dram_ucode; u8 *rlcp_ucode; u8 *rlcv_ucode; + u8 *global_tap_delays_ucode; + u8 *se0_tap_delays_ucode; + u8 *se1_tap_delays_ucode; + u8 *se2_tap_delays_ucode; + u8 *se3_tap_delays_ucode; bool is_rlc_v2_1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index c312577df596..939c8614f0e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -561,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "RLC_P"; case AMDGPU_UCODE_ID_RLC_V: return "RLC_V"; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + return "GLOBAL_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + return "SE0_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + return "SE1_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + return "SE2_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + return "SE3_TAP_DELAYS"; case AMDGPU_UCODE_ID_IMU_I: return "IMU_I"; case AMDGPU_UCODE_ID_IMU_D: @@ -745,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlcv_ucode; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.global_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode; + break; case AMDGPU_UCODE_ID_CP_MES: ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index f510b6aa82ab..ebed3f5226db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 { uint32_t rlcv_ucode_offset_bytes; }; +/* version_major=2, version_minor=4 */ +struct rlc_firmware_header_v2_4 { + struct rlc_firmware_header_v2_3 v2_3; + uint32_t global_tap_delays_ucode_size_bytes; + uint32_t global_tap_delays_ucode_offset_bytes; + uint32_t se0_tap_delays_ucode_size_bytes; + uint32_t se0_tap_delays_ucode_offset_bytes; + uint32_t se1_tap_delays_ucode_size_bytes; + uint32_t se1_tap_delays_ucode_offset_bytes; + uint32_t se2_tap_delays_ucode_size_bytes; + uint32_t se2_tap_delays_ucode_offset_bytes; + uint32_t se3_tap_delays_ucode_size_bytes; + uint32_t se3_tap_delays_ucode_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MES1_DATA, AMDGPU_UCODE_ID_IMU_I, AMDGPU_UCODE_ID_IMU_D, + AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS, + AMDGPU_UCODE_ID_SE0_TAP_DELAYS, + AMDGPU_UCODE_ID_SE1_TAP_DELAYS, + AMDGPU_UCODE_ID_SE2_TAP_DELAYS, + AMDGPU_UCODE_ID_SE3_TAP_DELAYS, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 2ec6698aa1fe..3629d8f292ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -41,6 +41,12 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) +#define LOOP_UMC_NODE_INST(node_inst) \ + for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++) + +#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ + LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -62,6 +68,10 @@ struct amdgpu_umc { uint32_t channel_inst_num; /* number of umc instance with memory map register access */ uint32_t umc_inst_num; + + /*number of umc node instance with memory map register access*/ + uint32_t node_inst_num; + /* UMC regiser per channel offset */ uint32_t channel_offs; /* channel index table of interleaved memory */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3caf6f386042..77f5e998a120 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 7c75df5bffed..802e5c753271 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5820c3f0e215..fafbad3cf08d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3976,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); } +static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); +} + static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4153,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (version_major == 2) { if (version_minor >= 1) gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor == 2) + if (version_minor >= 2) gfx_v10_0_init_rlc_iram_dram_microcode(adev); + if (version_minor == 4) { + gfx_v10_0_init_tap_delays_microcode(adev); + } } } @@ -4251,8 +4271,39 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); } + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5349ca4d19e3..c6e0f9313a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 1772f006c61a..9ae8cdaa033e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -22,6 +22,9 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v10_0.h" @@ -980,6 +983,8 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v10_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index edbdc0b934ea..1471bfb9ae38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -22,10 +22,13 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" -#include "umc_v8_7.h" +#include "umc_v8_10.h" #include "athub/athub_3_0_0_sh_mask.h" #include "athub/athub_3_0_0_offset.h" #include "oss/osssys_6_0_0_offset.h" @@ -537,11 +540,36 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[UMC_HWIP][0]) { case IP_VERSION(8, 10, 0): + adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; + adev->umc.node_inst_num = adev->gmc.num_umc; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; + adev->umc.ras = &umc_v8_10_ras; + break; case IP_VERSION(8, 11, 0): break; default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not define special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } @@ -750,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v11_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 34c610b9157d..b465baa26762 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { - .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 6074dd3a1ed8..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4531761dcf77..11848d1e238b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { - .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index 7490022d79d4..f27c41728822 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern struct amdgpu_nbio_ras nbio_v7_4_ras; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 236b7a61443a..22c775f39119 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -259,6 +259,8 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_CAP = 62, /* CAP_FW */ + GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */ + GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 765c3543ad18..00e9b7089feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c new file mode 100644 index 000000000000..36a2053f2e8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -0,0 +1,357 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_10.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_10_0_offset.h" +#include "umc/umc_8_10_0_sh_mask.h" + +#define UMC_8_NODE_DIST 0x800000 +#define UMC_8_INST_DIST 0x4000 + +struct channelnum_map_colbit { + uint32_t channel_num; + uint32_t col_bit; +}; + +const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = { + {24, 13}, + {20, 13}, + {16, 12}, + {14, 12}, + {12, 12}, + {10, 12}, + {6, 11}, +}; + +const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { + {{16, 18}, {17, 19}}, + {{15, 11}, {3, 7}}, + {{1, 5}, {13, 9}}, + {{23, 21}, {22, 20}}, + {{0, 4}, {12, 8}}, + {{14, 10}, {2, 6}} + }; + +static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, + uint32_t node_inst, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst + + UMC_8_NODE_DIST * node_inst; +} + +static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_clear_error_count_per_channel(adev, + umc_reg_offset); + } +} + +static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + /* UMC 8_10 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - + UMC_V8_10_CE_CNT_INIT); + + /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Check the MCUMC_STATUS. */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + umc_v8_10_clear_error_count(adev); +} + +static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num) +{ + uint32_t t = 0; + + for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++) + if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num) + return umc_v8_10_channelnum_map_colbit_table[t].col_bit; + + /* Failed to get col_bit. */ + return U32_MAX; +} + +/* + * Mapping normal address to soc physical address in swizzle mode. + */ +static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev, + uint32_t channel_idx, + uint64_t na, uint64_t *soc_pa) +{ + uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + uint32_t col_bit = umc_v8_10_get_col_bit(channel_num); + uint64_t tmp_addr; + + if (col_bit == U32_MAX) + return -1; + + tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx); + *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_MID(na, col_bit) | + SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_LSB(na); + + return 0; +} + +static void umc_v8_10_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t node_inst, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + uint32_t channel_index; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + return; + } + + channel_index = + adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * + adev->umc.channel_inst_num + + umc_inst * adev->umc.channel_inst_num + + ch_inst]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + uint32_t addr_lsb; + uint64_t mc_umc_addrt0; + + mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* the lowest lsb bits should be ignored */ + addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); + + err_addr &= ~((0x1ULL << addr_lsb) - 1); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + uint64_t na_err_addr, retired_page_addr; + uint32_t col = 0; + int ret = 0; + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; + } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); + } + } + } + + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); +} + +static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_error_address(adev, + err_data, + umc_reg_offset, + node_inst, + ch_inst, + umc_inst); + } +} + +static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); + } +} + +const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { + .query_ras_error_count = umc_v8_10_query_ras_error_count, + .query_ras_error_address = umc_v8_10_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v8_10_ras = { + .ras_block = { + .hw_ops = &umc_v8_10_ras_hw_ops, + }, + .err_cnt_init = umc_v8_10_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h new file mode 100644 index 000000000000..849ede88e111 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -0,0 +1,70 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_10_H__ +#define __UMC_V8_10_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_10_UMC_INSTANCE_NUM 2 + +/* Total channel instances for all umc nodes */ +#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num) + +/* UMC regiser per channel offset */ +#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 + +/* EccErrCnt max value */ +#define UMC_V8_10_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD) + +#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4 + +/* The C5 bit in NA address */ +#define UMC_V8_10_NA_C5_BIT 14 + +/* Map to swizzle mode address */ +#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \ + ((((na) >> 10) * (ch_num) + (ch_idx)) << 10) +#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \ + (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2)) +#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit)) +#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \ + ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8) +#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF) + +extern struct amdgpu_umc_ras umc_v8_10_ras; +extern const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM]; + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 84ac2401895a..a91ffbf902d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_hw_ip.h" @@ -44,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +{ + struct drm_gpu_scheduler **scheds; + + /* The create msg must be in the first IB submitted */ + if (atomic_read(&p->entity->fence_seq)) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] + [AMDGPU_RING_PRIO_0].sched; + drm_sched_entity_modify_sched(p->entity, scheds, 1); + return 0; +} + +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void *ptr; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H246, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v4_0_limit_sched(p); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; +} + +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + uint32_t val; + int r = 0; + + /* The first instance can decode anything */ + if (!ring->me) + return r; + + /* unified queue ib header has 8 double words. */ + if (ib->length_dw < 8) + return r; + + val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + + if (decode_buffer->valid_buf_flag & 0x1) + r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo); + } + return r; +} + static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, + .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fc38a4d81420..6c83a519b3a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1115,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work) struct kfd_process *p = container_of(work, struct kfd_process, release_work); + kfd_process_dequeue_from_all_devices(p); + pqm_uninit(&p->pqm); + + /* Signal the eviction fence after user mode queues are + * destroyed. This allows any BOs to be freed without + * triggering pointless evictions or waiting for fences. + */ + dma_fence_signal(p->ef); + kfd_process_remove_sysfs(p); kfd_iommu_unbind_process(p); @@ -1179,20 +1188,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - mutex_lock(&p->mutex); - - kfd_process_dequeue_from_all_devices(p); - pqm_uninit(&p->pqm); - /* Indicate to other users that MM is no longer valid */ p->mm = NULL; - /* Signal the eviction fence after user mode queues are - * destroyed. This allows any BOs to be freed without - * triggering pointless evictions or waiting for fences. - */ - dma_fence_signal(p->ef); - - mutex_unlock(&p->mutex); mmu_notifier_put(&p->mmu_notifier); } @@ -1405,6 +1402,11 @@ static struct kfd_process *create_process(const struct task_struct *thread) hash_add_rcu(kfd_processes_table, &process->kfd_processes, (uintptr_t)process->mm); + /* Avoid free_notifier to start kfd_process_wq_release if + * mmu_notifier_get failed because of pending signal. + */ + kref_get(&process->ref); + /* MMU notifier registration must be the last call that can fail * because after this point we cannot unwind the process creation. * After this point, mmu_notifier_put will trigger the cleanup by @@ -1417,6 +1419,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) } BUG_ON(mn != &process->mmu_notifier); + kfd_unref_process(process); get_task_struct(process->lead_thread); return process; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index b4029c0d5d8c..96cbc87f7b6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 718e123a3230..90fb0f3cdb6f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -25,7 +25,13 @@ -AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o +AMDGPUDM = \ + amdgpu_dm.o \ + amdgpu_dm_plane.o \ + amdgpu_dm_crtc.o \ + amdgpu_dm_irq.o \ + amdgpu_dm_mst_types.o \ + amdgpu_dm_color.o ifdef CONFIG_DRM_AMD_DC_DCN AMDGPUDM += dc_fpu.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e83fed540e8..8660d93cc405 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -46,6 +46,8 @@ #include "amdgpu_ucode.h" #include "atom.h" #include "amdgpu_dm.h" +#include "amdgpu_dm_plane.h" +#include "amdgpu_dm_crtc.h" #ifdef CONFIG_DRM_AMD_DC_HDCP #include "amdgpu_dm_hdcp.h" #include <drm/display/drm_hdcp_helper.h> @@ -206,13 +208,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev); /* removes and deallocates the drm structures, created by the above function */ static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm); -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs, - const struct dc_plane_cap *plane_cap); -static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - uint32_t link_index); static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *amdgpu_dm_connector, uint32_t link_index, @@ -228,12 +223,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); -static void handle_cursor_update(struct drm_plane *plane, - struct drm_plane_state *old_plane_state); - -static const struct drm_format_info * -amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); - static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); @@ -347,20 +336,6 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, return NULL; } -static inline bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc) -{ - return acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE || - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_FIXED; -} - -static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) -{ - return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || - dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; -} - static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { @@ -476,26 +451,6 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } -static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) -{ - struct drm_crtc *crtc = &acrtc->base; - struct drm_device *dev = crtc->dev; - unsigned long flags; - - drm_crtc_handle_vblank(crtc); - - spin_lock_irqsave(&dev->event_lock, flags); - - /* Send completion event for cursor-only commits */ - if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { - drm_crtc_send_vblank_event(crtc, acrtc->event); - drm_crtc_vblank_put(crtc); - acrtc->event = NULL; - } - - spin_unlock_irqrestore(&dev->event_lock, flags); -} - static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -1273,52 +1228,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } -static void vblank_control_worker(struct work_struct *work) -{ - struct vblank_control_work *vblank_work = - container_of(work, struct vblank_control_work, work); - struct amdgpu_display_manager *dm = vblank_work->dm; - - mutex_lock(&dm->dc_lock); - - if (vblank_work->enable) - dm->active_vblank_irq_count++; - else if(dm->active_vblank_irq_count) - dm->active_vblank_irq_count--; - - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); - - /* - * Control PSR based on vblank requirements from OS - * - * If panel supports PSR SU, there's no need to disable PSR when OS is - * submitting fast atomic commits (we infer this by whether the OS - * requests vblank events). Fast atomic commits will simply trigger a - * full-frame-update (FFU); a specific case of selective-update (SU) - * where the SU region is the full hactive*vactive region. See - * fill_dc_dirty_rects(). - */ - if (vblank_work->stream && vblank_work->stream->link) { - if (vblank_work->enable) { - if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && - vblank_work->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(vblank_work->stream); - } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && - !vblank_work->stream->link->psr_settings.psr_allow_active && - vblank_work->acrtc->dm_irq_params.allow_psr_entry) { - amdgpu_dm_psr_enable(vblank_work->stream); - } - } - - mutex_unlock(&dm->dc_lock); - - dc_stream_release(vblank_work->stream); - - kfree(vblank_work); -} - static void dm_handle_hpd_rx_offload_work(struct work_struct *work) { struct hpd_rx_irq_offload_work *offload_work; @@ -1629,6 +1538,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH) adev->dm.dc->debug.force_subvp_mclk_switch = true; + adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm; + r = dm_dmub_hw_init(adev); if (r) { DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); @@ -1896,6 +1807,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): @@ -2394,9 +2306,6 @@ static int dm_hw_fini(void *handle) } -static int dm_enable_vblank(struct drm_crtc *crtc); -static void dm_disable_vblank(struct drm_crtc *crtc); - static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, struct dc_state *state, bool enable) { @@ -4288,6 +4197,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) for (i = 0; i < dm->dc->caps.max_planes; ++i) { struct dc_plane_cap *plane = &dm->dc->caps.planes[i]; + /* Do not create overlay if MPO disabled */ + if (amdgpu_dc_debug_mask & DC_DISABLE_MPO) + break; + if (plane->type != DC_PLANE_TYPE_DCN_UNIVERSAL) continue; @@ -4688,13 +4601,6 @@ static int dm_early_init(void *handle) return 0; } -static bool modeset_required(struct drm_crtc_state *crtc_state, - struct dc_stream_state *new_stream, - struct dc_stream_state *old_stream) -{ - return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); -} - static bool modereset_required(struct drm_crtc_state *crtc_state) { return !crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); @@ -4710,889 +4616,6 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { .destroy = amdgpu_dm_encoder_destroy, }; - -static void get_min_max_dc_plane_scaling(struct drm_device *dev, - struct drm_framebuffer *fb, - int *min_downscale, int *max_upscale) -{ - struct amdgpu_device *adev = drm_to_adev(dev); - struct dc *dc = adev->dm.dc; - /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ - struct dc_plane_cap *plane_cap = &dc->caps.planes[0]; - - switch (fb->format->format) { - case DRM_FORMAT_P010: - case DRM_FORMAT_NV12: - case DRM_FORMAT_NV21: - *max_upscale = plane_cap->max_upscale_factor.nv12; - *min_downscale = plane_cap->max_downscale_factor.nv12; - break; - - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - *max_upscale = plane_cap->max_upscale_factor.fp16; - *min_downscale = plane_cap->max_downscale_factor.fp16; - break; - - default: - *max_upscale = plane_cap->max_upscale_factor.argb8888; - *min_downscale = plane_cap->max_downscale_factor.argb8888; - break; - } - - /* - * A factor of 1 in the plane_cap means to not allow scaling, ie. use a - * scaling factor of 1.0 == 1000 units. - */ - if (*max_upscale == 1) - *max_upscale = 1000; - - if (*min_downscale == 1) - *min_downscale = 1000; -} - - -static int fill_dc_scaling_info(struct amdgpu_device *adev, - const struct drm_plane_state *state, - struct dc_scaling_info *scaling_info) -{ - int scale_w, scale_h, min_downscale, max_upscale; - - memset(scaling_info, 0, sizeof(*scaling_info)); - - /* Source is fixed 16.16 but we ignore mantissa for now... */ - scaling_info->src_rect.x = state->src_x >> 16; - scaling_info->src_rect.y = state->src_y >> 16; - - /* - * For reasons we don't (yet) fully understand a non-zero - * src_y coordinate into an NV12 buffer can cause a - * system hang on DCN1x. - * To avoid hangs (and maybe be overly cautious) - * let's reject both non-zero src_x and src_y. - * - * We currently know of only one use-case to reproduce a - * scenario with non-zero src_x and src_y for NV12, which - * is to gesture the YouTube Android app into full screen - * on ChromeOS. - */ - if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || - (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) && - (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && - (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) - return -EINVAL; - - scaling_info->src_rect.width = state->src_w >> 16; - if (scaling_info->src_rect.width == 0) - return -EINVAL; - - scaling_info->src_rect.height = state->src_h >> 16; - if (scaling_info->src_rect.height == 0) - return -EINVAL; - - scaling_info->dst_rect.x = state->crtc_x; - scaling_info->dst_rect.y = state->crtc_y; - - if (state->crtc_w == 0) - return -EINVAL; - - scaling_info->dst_rect.width = state->crtc_w; - - if (state->crtc_h == 0) - return -EINVAL; - - scaling_info->dst_rect.height = state->crtc_h; - - /* DRM doesn't specify clipping on destination output. */ - scaling_info->clip_rect = scaling_info->dst_rect; - - /* Validate scaling per-format with DC plane caps */ - if (state->plane && state->plane->dev && state->fb) { - get_min_max_dc_plane_scaling(state->plane->dev, state->fb, - &min_downscale, &max_upscale); - } else { - min_downscale = 250; - max_upscale = 16000; - } - - scale_w = scaling_info->dst_rect.width * 1000 / - scaling_info->src_rect.width; - - if (scale_w < min_downscale || scale_w > max_upscale) - return -EINVAL; - - scale_h = scaling_info->dst_rect.height * 1000 / - scaling_info->src_rect.height; - - if (scale_h < min_downscale || scale_h > max_upscale) - return -EINVAL; - - /* - * The "scaling_quality" can be ignored for now, quality = 0 has DC - * assume reasonable defaults based on the format. - */ - - return 0; -} - -static void -fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, - uint64_t tiling_flags) -{ - /* Fill GFX8 params */ - if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) { - unsigned int bankw, bankh, mtaspect, tile_split, num_banks; - - bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); - bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); - mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); - tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); - num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - - /* XXX fix me for VI */ - tiling_info->gfx8.num_banks = num_banks; - tiling_info->gfx8.array_mode = - DC_ARRAY_2D_TILED_THIN1; - tiling_info->gfx8.tile_split = tile_split; - tiling_info->gfx8.bank_width = bankw; - tiling_info->gfx8.bank_height = bankh; - tiling_info->gfx8.tile_aspect = mtaspect; - tiling_info->gfx8.tile_mode = - DC_ADDR_SURF_MICRO_TILING_DISPLAY; - } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) - == DC_ARRAY_1D_TILED_THIN1) { - tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; - } - - tiling_info->gfx8.pipe_config = - AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); -} - -static void -fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info) -{ - tiling_info->gfx9.num_pipes = - adev->gfx.config.gb_addr_config_fields.num_pipes; - tiling_info->gfx9.num_banks = - adev->gfx.config.gb_addr_config_fields.num_banks; - tiling_info->gfx9.pipe_interleave = - adev->gfx.config.gb_addr_config_fields.pipe_interleave_size; - tiling_info->gfx9.num_shader_engines = - adev->gfx.config.gb_addr_config_fields.num_se; - tiling_info->gfx9.max_compressed_frags = - adev->gfx.config.gb_addr_config_fields.max_compress_frags; - tiling_info->gfx9.num_rb_per_se = - adev->gfx.config.gb_addr_config_fields.num_rb_per_se; - tiling_info->gfx9.shaderEnable = 1; - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) - tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; -} - -static int -validate_dcc(struct amdgpu_device *adev, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const union dc_tiling_info *tiling_info, - const struct dc_plane_dcc_param *dcc, - const struct dc_plane_address *address, - const struct plane_size *plane_size) -{ - struct dc *dc = adev->dm.dc; - struct dc_dcc_surface_param input; - struct dc_surface_dcc_cap output; - - memset(&input, 0, sizeof(input)); - memset(&output, 0, sizeof(output)); - - if (!dcc->enable) - return 0; - - if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || - !dc->cap_funcs.get_dcc_compression_cap) - return -EINVAL; - - input.format = format; - input.surface_size.width = plane_size->surface_size.width; - input.surface_size.height = plane_size->surface_size.height; - input.swizzle_mode = tiling_info->gfx9.swizzle; - - if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) - input.scan = SCAN_DIRECTION_HORIZONTAL; - else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270) - input.scan = SCAN_DIRECTION_VERTICAL; - - if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) - return -EINVAL; - - if (!output.capable) - return -EINVAL; - - if (dcc->independent_64b_blks == 0 && - output.grph.rgb.independent_64b_blks != 0) - return -EINVAL; - - return 0; -} - -static bool -modifier_has_dcc(uint64_t modifier) -{ - return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); -} - -static unsigned -modifier_gfx9_swizzle_mode(uint64_t modifier) -{ - if (modifier == DRM_FORMAT_MOD_LINEAR) - return 0; - - return AMD_FMT_MOD_GET(TILE, modifier); -} - -static const struct drm_format_info * -amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd) -{ - return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); -} - -static void -fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info, - uint64_t modifier) -{ - unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); - unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); - unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier); - unsigned int pipes_log2; - - pipes_log2 = min(5u, mod_pipe_xor_bits); - - fill_gfx9_tiling_info_from_device(adev, tiling_info); - - if (!IS_AMD_FMT_MOD(modifier)) - return; - - tiling_info->gfx9.num_pipes = 1u << pipes_log2; - tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2); - - if (adev->family >= AMDGPU_FAMILY_NV) { - tiling_info->gfx9.num_pkrs = 1u << pkrs_log2; - } else { - tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits; - - /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */ - } -} - -enum dm_micro_swizzle { - MICRO_SWIZZLE_Z = 0, - MICRO_SWIZZLE_S = 1, - MICRO_SWIZZLE_D = 2, - MICRO_SWIZZLE_R = 3 -}; - -static bool dm_plane_format_mod_supported(struct drm_plane *plane, - uint32_t format, - uint64_t modifier) -{ - struct amdgpu_device *adev = drm_to_adev(plane->dev); - const struct drm_format_info *info = drm_format_info(format); - int i; - - enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3; - - if (!info) - return false; - - /* - * We always have to allow these modifiers: - * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers. - * 2. Not passing any modifiers is the same as explicitly passing INVALID. - */ - if (modifier == DRM_FORMAT_MOD_LINEAR || - modifier == DRM_FORMAT_MOD_INVALID) { - return true; - } - - /* Check that the modifier is on the list of the plane's supported modifiers. */ - for (i = 0; i < plane->modifier_count; i++) { - if (modifier == plane->modifiers[i]) - break; - } - if (i == plane->modifier_count) - return false; - - /* - * For D swizzle the canonical modifier depends on the bpp, so check - * it here. - */ - if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && - adev->family >= AMDGPU_FAMILY_NV) { - if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4) - return false; - } - - if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D && - info->cpp[0] < 8) - return false; - - if (modifier_has_dcc(modifier)) { - /* Per radeonsi comments 16/64 bpp are more complicated. */ - if (info->cpp[0] != 4) - return false; - /* We support multi-planar formats, but not when combined with - * additional DCC metadata planes. */ - if (info->num_planes > 1) - return false; - } - - return true; -} - -static void -add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod) -{ - if (!*mods) - return; - - if (*cap - *size < 1) { - uint64_t new_cap = *cap * 2; - uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); - - if (!new_mods) { - kfree(*mods); - *mods = NULL; - return; - } - - memcpy(new_mods, *mods, sizeof(uint64_t) * *size); - kfree(*mods); - *mods = new_mods; - *cap = new_cap; - } - - (*mods)[*size] = mod; - *size += 1; -} - -static void -add_gfx9_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - int pipe_xor_bits = min(8, pipes + - ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); - int bank_xor_bits = min(8 - pipe_xor_bits, - ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); - int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + - ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); - - - if (adev->family == AMDGPU_FAMILY_RV) { - /* Raven2 and later */ - bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81; - - /* - * No _D DCC swizzles yet because we only allow 32bpp, which - * doesn't support _D on DCN - */ - - if (has_constant_encode) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1)); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0)); - - if (has_constant_encode) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(RB, rb) | - AMD_FMT_MOD_SET(PIPE, pipes)); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) | - AMD_FMT_MOD_SET(RB, rb) | - AMD_FMT_MOD_SET(PIPE, pipes)); - } - - /* - * Only supported for 64bpp on Raven, will be filtered on format in - * dm_plane_format_mod_supported. - */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); - - if (adev->family == AMDGPU_FAMILY_RV) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); - } - - /* - * Only supported for 64bpp on Raven, will be filtered on format in - * dm_plane_format_mod_supported. - */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - if (adev->family == AMDGPU_FAMILY_RV) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - } -} - -static void -add_gfx10_1_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); - - - /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); -} - -static void -add_gfx10_3_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs)); - - /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); -} - -static void -add_gfx11_modifiers(struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int num_pipes = 0; - int pipe_xor_bits = 0; - int num_pkrs = 0; - int pkrs = 0; - u32 gb_addr_config; - u8 i = 0; - unsigned swizzle_r_x; - uint64_t modifier_r_x; - uint64_t modifier_dcc_best; - uint64_t modifier_dcc_4k; - - /* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from - * adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} */ - gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); - ASSERT(gb_addr_config != 0); - - num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); - pkrs = ilog2(num_pkrs); - num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES); - pipe_xor_bits = ilog2(num_pipes); - - for (i = 0; i < 2; i++) { - /* Insert the best one first. */ - /* R_X swizzle modes are the best for rendering and DCC requires them. */ - if (num_pipes > 16) - swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X; - else - swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X; - - modifier_r_x = AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(TILE, swizzle_r_x) | - AMD_FMT_MOD_SET(PACKERS, pkrs); - - /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */ - modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); - - /* DCC settings for 4K and greater resolutions. (required by display hw) */ - modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); - - add_modifier(mods, size, capacity, modifier_dcc_best); - add_modifier(mods, size, capacity, modifier_dcc_4k); - - add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1)); - add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1)); - - add_modifier(mods, size, capacity, modifier_r_x); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D)); -} - -static int -get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods) -{ - uint64_t size = 0, capacity = 128; - *mods = NULL; - - /* We have not hooked up any pre-GFX9 modifiers. */ - if (adev->family < AMDGPU_FAMILY_AI) - return 0; - - *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); - - if (plane_type == DRM_PLANE_TYPE_CURSOR) { - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); - return *mods ? 0 : -ENOMEM; - } - - switch (adev->family) { - case AMDGPU_FAMILY_AI: - case AMDGPU_FAMILY_RV: - add_gfx9_modifiers(adev, mods, &size, &capacity); - break; - case AMDGPU_FAMILY_NV: - case AMDGPU_FAMILY_VGH: - case AMDGPU_FAMILY_YC: - case AMDGPU_FAMILY_GC_10_3_6: - case AMDGPU_FAMILY_GC_10_3_7: - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) - add_gfx10_3_modifiers(adev, mods, &size, &capacity); - else - add_gfx10_1_modifiers(adev, mods, &size, &capacity); - break; - case AMDGPU_FAMILY_GC_11_0_0: - case AMDGPU_FAMILY_GC_11_0_2: - add_gfx11_modifiers(adev, mods, &size, &capacity); - break; - } - - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); - - /* INVALID marks the end of the list. */ - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); - - if (!*mods) - return -ENOMEM; - - return 0; -} - -static int -fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, - const struct amdgpu_framebuffer *afb, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const struct plane_size *plane_size, - union dc_tiling_info *tiling_info, - struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) -{ - const uint64_t modifier = afb->base.modifier; - int ret = 0; - - fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); - tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); - - if (modifier_has_dcc(modifier) && !force_disable_dcc) { - uint64_t dcc_address = afb->address + afb->base.offsets[1]; - bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); - bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); - - dcc->enable = 1; - dcc->meta_pitch = afb->base.pitches[1]; - dcc->independent_64b_blks = independent_64b_blks; - if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) { - if (independent_64b_blks && independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl; - else if (independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_128b; - else if (independent_64b_blks && !independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b; - else - dcc->dcc_ind_blk = hubp_ind_block_unconstrained; - } else { - if (independent_64b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b; - else - dcc->dcc_ind_blk = hubp_ind_block_unconstrained; - } - - address->grph.meta_addr.low_part = lower_32_bits(dcc_address); - address->grph.meta_addr.high_part = upper_32_bits(dcc_address); - } - - ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); - if (ret) - drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); - - return ret; -} - -static int -fill_plane_buffer_attributes(struct amdgpu_device *adev, - const struct amdgpu_framebuffer *afb, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const uint64_t tiling_flags, - union dc_tiling_info *tiling_info, - struct plane_size *plane_size, - struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) -{ - const struct drm_framebuffer *fb = &afb->base; - int ret; - - memset(tiling_info, 0, sizeof(*tiling_info)); - memset(plane_size, 0, sizeof(*plane_size)); - memset(dcc, 0, sizeof(*dcc)); - memset(address, 0, sizeof(*address)); - - address->tmz_surface = tmz_surface; - - if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - uint64_t addr = afb->address + fb->offsets[0]; - - plane_size->surface_size.x = 0; - plane_size->surface_size.y = 0; - plane_size->surface_size.width = fb->width; - plane_size->surface_size.height = fb->height; - plane_size->surface_pitch = - fb->pitches[0] / fb->format->cpp[0]; - - address->type = PLN_ADDR_TYPE_GRAPHICS; - address->grph.addr.low_part = lower_32_bits(addr); - address->grph.addr.high_part = upper_32_bits(addr); - } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { - uint64_t luma_addr = afb->address + fb->offsets[0]; - uint64_t chroma_addr = afb->address + fb->offsets[1]; - - plane_size->surface_size.x = 0; - plane_size->surface_size.y = 0; - plane_size->surface_size.width = fb->width; - plane_size->surface_size.height = fb->height; - plane_size->surface_pitch = - fb->pitches[0] / fb->format->cpp[0]; - - plane_size->chroma_size.x = 0; - plane_size->chroma_size.y = 0; - /* TODO: set these based on surface format */ - plane_size->chroma_size.width = fb->width / 2; - plane_size->chroma_size.height = fb->height / 2; - - plane_size->chroma_pitch = - fb->pitches[1] / fb->format->cpp[1]; - - address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; - address->video_progressive.luma_addr.low_part = - lower_32_bits(luma_addr); - address->video_progressive.luma_addr.high_part = - upper_32_bits(luma_addr); - address->video_progressive.chroma_addr.low_part = - lower_32_bits(chroma_addr); - address->video_progressive.chroma_addr.high_part = - upper_32_bits(chroma_addr); - } - - if (adev->family >= AMDGPU_FAMILY_AI) { - ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, - rotation, plane_size, - tiling_info, dcc, - address, - force_disable_dcc); - if (ret) - return ret; - } else { - fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags); - } - - return 0; -} - -static void -fill_blending_from_plane_state(const struct drm_plane_state *plane_state, - bool *per_pixel_alpha, bool *pre_multiplied_alpha, - bool *global_alpha, int *global_alpha_value) -{ - *per_pixel_alpha = false; - *pre_multiplied_alpha = true; - *global_alpha = false; - *global_alpha_value = 0xff; - - if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) - return; - - if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || - plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { - static const uint32_t alpha_formats[] = { - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_ABGR8888, - }; - uint32_t format = plane_state->fb->format->format; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) { - if (format == alpha_formats[i]) { - *per_pixel_alpha = true; - break; - } - } - - if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) - *pre_multiplied_alpha = false; - } - - if (plane_state->alpha < 0xffff) { - *global_alpha = true; - *global_alpha_value = plane_state->alpha >> 8; - } -} - static int fill_plane_color_attributes(const struct drm_plane_state *plane_state, const enum surface_pixel_format format, @@ -5727,6 +4750,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, break; } + plane_info->visible = true; plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; @@ -5741,8 +4765,8 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, plane_info->rotation, tiling_flags, &plane_info->tiling_info, &plane_info->plane_size, - &plane_info->dcc, address, tmz_surface, - force_disable_dcc); + &plane_info->dcc, address, + tmz_surface, force_disable_dcc); if (ret) return ret; @@ -6168,7 +5192,7 @@ static void fill_stream_properties_from_drm_display_mode( timing_out->scan_type = SCANNING_TYPE_NODATA; timing_out->hdmi_vic = 0; - if(old_stream) { + if (old_stream) { timing_out->vic = old_stream->timing.vic; timing_out->flags.HSYNC_POSITIVE_POLARITY = old_stream->timing.flags.HSYNC_POSITIVE_POLARITY; timing_out->flags.VSYNC_POSITIVE_POLARITY = old_stream->timing.flags.VSYNC_POSITIVE_POLARITY; @@ -6390,16 +5414,126 @@ static void dm_enable_per_frame_crtc_master_sync(struct dc_state *context) } } +/** + * DOC: FreeSync Video + * + * When a userspace application wants to play a video, the content follows a + * standard format definition that usually specifies the FPS for that format. + * The below list illustrates some video format and the expected FPS, + * respectively: + * + * - TV/NTSC (23.976 FPS) + * - Cinema (24 FPS) + * - TV/PAL (25 FPS) + * - TV/NTSC (29.97 FPS) + * - TV/NTSC (30 FPS) + * - Cinema HFR (48 FPS) + * - TV/PAL (50 FPS) + * - Commonly used (60 FPS) + * - Multiples of 24 (48,72,96 FPS) + * + * The list of standards video format is not huge and can be added to the + * connector modeset list beforehand. With that, userspace can leverage + * FreeSync to extends the front porch in order to attain the target refresh + * rate. Such a switch will happen seamlessly, without screen blanking or + * reprogramming of the output in any other way. If the userspace requests a + * modesetting change compatible with FreeSync modes that only differ in the + * refresh rate, DC will skip the full update and avoid blink during the + * transition. For example, the video player can change the modesetting from + * 60Hz to 30Hz for playing TV/NTSC content when it goes full screen without + * causing any display blink. This same concept can be applied to a mode + * setting change. + */ +static struct drm_display_mode * +get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, + bool use_probed_modes) +{ + struct drm_display_mode *m, *m_pref = NULL; + u16 current_refresh, highest_refresh; + struct list_head *list_head = use_probed_modes ? + &aconnector->base.probed_modes : + &aconnector->base.modes; + + if (aconnector->freesync_vid_base.clock != 0) + return &aconnector->freesync_vid_base; + + /* Find the preferred mode */ + list_for_each_entry (m, list_head, head) { + if (m->type & DRM_MODE_TYPE_PREFERRED) { + m_pref = m; + break; + } + } + + if (!m_pref) { + /* Probably an EDID with no preferred mode. Fallback to first entry */ + m_pref = list_first_entry_or_null( + &aconnector->base.modes, struct drm_display_mode, head); + if (!m_pref) { + DRM_DEBUG_DRIVER("No preferred mode found in EDID\n"); + return NULL; + } + } + + highest_refresh = drm_mode_vrefresh(m_pref); + + /* + * Find the mode with highest refresh rate with same resolution. + * For some monitors, preferred mode is not the mode with highest + * supported refresh rate. + */ + list_for_each_entry (m, list_head, head) { + current_refresh = drm_mode_vrefresh(m); + + if (m->hdisplay == m_pref->hdisplay && + m->vdisplay == m_pref->vdisplay && + highest_refresh < current_refresh) { + highest_refresh = current_refresh; + m_pref = m; + } + } + + drm_mode_copy(&aconnector->freesync_vid_base, m_pref); + return m_pref; +} + +static bool is_freesync_video_mode(const struct drm_display_mode *mode, + struct amdgpu_dm_connector *aconnector) +{ + struct drm_display_mode *high_mode; + int timing_diff; + + high_mode = get_highest_refresh_rate_mode(aconnector, false); + if (!high_mode || !mode) + return false; + + timing_diff = high_mode->vtotal - mode->vtotal; + + if (high_mode->clock == 0 || high_mode->clock != mode->clock || + high_mode->hdisplay != mode->hdisplay || + high_mode->vdisplay != mode->vdisplay || + high_mode->hsync_start != mode->hsync_start || + high_mode->hsync_end != mode->hsync_end || + high_mode->htotal != mode->htotal || + high_mode->hskew != mode->hskew || + high_mode->vscan != mode->vscan || + high_mode->vsync_start - mode->vsync_start != timing_diff || + high_mode->vsync_end - mode->vsync_end != timing_diff) + return false; + else + return true; +} + #if defined(CONFIG_DRM_AMD_DC_DCN) static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, - struct dc_sink *sink, struct dc_stream_state *stream, - struct dsc_dec_dpcd_caps *dsc_caps) + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps) { stream->timing.flags.DSC = 0; dsc_caps->is_dsc_supported = false; if (aconnector->dc_link && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || - sink->sink_signal == SIGNAL_TYPE_EDP)) { + sink->sink_signal == SIGNAL_TYPE_EDP)) { if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE || sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, @@ -6409,6 +5543,7 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, } } + static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, struct dc_sink *sink, struct dc_stream_state *stream, struct dsc_dec_dpcd_caps *dsc_caps, @@ -6467,9 +5602,10 @@ static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, } } + static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, - struct dc_sink *sink, struct dc_stream_state *stream, - struct dsc_dec_dpcd_caps *dsc_caps) + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps) { struct drm_connector *drm_connector = &aconnector->base; uint32_t link_bandwidth_kbps; @@ -6480,7 +5616,6 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, dc_link_get_link_cap(aconnector->dc_link)); - if (stream->link && stream->link->local_sink) max_dsc_target_bpp_limit_override = stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit; @@ -6504,8 +5639,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, &stream->timing, &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", - __func__, drm_connector->name); + DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); } } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing); @@ -6544,116 +5678,6 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, } #endif /* CONFIG_DRM_AMD_DC_DCN */ -/** - * DOC: FreeSync Video - * - * When a userspace application wants to play a video, the content follows a - * standard format definition that usually specifies the FPS for that format. - * The below list illustrates some video format and the expected FPS, - * respectively: - * - * - TV/NTSC (23.976 FPS) - * - Cinema (24 FPS) - * - TV/PAL (25 FPS) - * - TV/NTSC (29.97 FPS) - * - TV/NTSC (30 FPS) - * - Cinema HFR (48 FPS) - * - TV/PAL (50 FPS) - * - Commonly used (60 FPS) - * - Multiples of 24 (48,72,96,120 FPS) - * - * The list of standards video format is not huge and can be added to the - * connector modeset list beforehand. With that, userspace can leverage - * FreeSync to extends the front porch in order to attain the target refresh - * rate. Such a switch will happen seamlessly, without screen blanking or - * reprogramming of the output in any other way. If the userspace requests a - * modesetting change compatible with FreeSync modes that only differ in the - * refresh rate, DC will skip the full update and avoid blink during the - * transition. For example, the video player can change the modesetting from - * 60Hz to 30Hz for playing TV/NTSC content when it goes full screen without - * causing any display blink. This same concept can be applied to a mode - * setting change. - */ -static struct drm_display_mode * -get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, - bool use_probed_modes) -{ - struct drm_display_mode *m, *m_pref = NULL; - u16 current_refresh, highest_refresh; - struct list_head *list_head = use_probed_modes ? - &aconnector->base.probed_modes : - &aconnector->base.modes; - - if (aconnector->freesync_vid_base.clock != 0) - return &aconnector->freesync_vid_base; - - /* Find the preferred mode */ - list_for_each_entry (m, list_head, head) { - if (m->type & DRM_MODE_TYPE_PREFERRED) { - m_pref = m; - break; - } - } - - if (!m_pref) { - /* Probably an EDID with no preferred mode. Fallback to first entry */ - m_pref = list_first_entry_or_null( - &aconnector->base.modes, struct drm_display_mode, head); - if (!m_pref) { - DRM_DEBUG_DRIVER("No preferred mode found in EDID\n"); - return NULL; - } - } - - highest_refresh = drm_mode_vrefresh(m_pref); - - /* - * Find the mode with highest refresh rate with same resolution. - * For some monitors, preferred mode is not the mode with highest - * supported refresh rate. - */ - list_for_each_entry (m, list_head, head) { - current_refresh = drm_mode_vrefresh(m); - - if (m->hdisplay == m_pref->hdisplay && - m->vdisplay == m_pref->vdisplay && - highest_refresh < current_refresh) { - highest_refresh = current_refresh; - m_pref = m; - } - } - - drm_mode_copy(&aconnector->freesync_vid_base, m_pref); - return m_pref; -} - -static bool is_freesync_video_mode(const struct drm_display_mode *mode, - struct amdgpu_dm_connector *aconnector) -{ - struct drm_display_mode *high_mode; - int timing_diff; - - high_mode = get_highest_refresh_rate_mode(aconnector, false); - if (!high_mode || !mode) - return false; - - timing_diff = high_mode->vtotal - mode->vtotal; - - if (high_mode->clock == 0 || high_mode->clock != mode->clock || - high_mode->hdisplay != mode->hdisplay || - high_mode->vdisplay != mode->vdisplay || - high_mode->hsync_start != mode->hsync_start || - high_mode->hsync_end != mode->hsync_end || - high_mode->htotal != mode->htotal || - high_mode->hskew != mode->hskew || - high_mode->vscan != mode->vscan || - high_mode->vsync_start - mode->vsync_start != timing_diff || - high_mode->vsync_end - mode->vsync_end != timing_diff) - return false; - else - return true; -} - static struct dc_stream_state * create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct drm_display_mode *drm_mode, @@ -6677,6 +5701,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif + struct dc_sink *sink = NULL; memset(&saved_mode, 0, sizeof(saved_mode)); @@ -6740,7 +5765,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_mode_copy(&mode, freesync_mode); } else { decide_crtc_timing_for_drm_display_mode( - &mode, preferred_mode, scale); + &mode, preferred_mode, scale); preferred_refresh = drm_mode_vrefresh(preferred_mode); } @@ -6751,7 +5776,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, else if (!dm_state) drm_mode_set_crtcinfo(&mode, 0); - /* + /* * If scaling is enabled and refresh rate didn't change * we copy the vic and polarities of the old timings */ @@ -6806,182 +5831,6 @@ finish: return stream; } -static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc) -{ - drm_crtc_cleanup(crtc); - kfree(crtc); -} - -static void dm_crtc_destroy_state(struct drm_crtc *crtc, - struct drm_crtc_state *state) -{ - struct dm_crtc_state *cur = to_dm_crtc_state(state); - - /* TODO Destroy dc_stream objects are stream object is flattened */ - if (cur->stream) - dc_stream_release(cur->stream); - - - __drm_atomic_helper_crtc_destroy_state(state); - - - kfree(state); -} - -static void dm_crtc_reset_state(struct drm_crtc *crtc) -{ - struct dm_crtc_state *state; - - if (crtc->state) - dm_crtc_destroy_state(crtc, crtc->state); - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (WARN_ON(!state)) - return; - - __drm_atomic_helper_crtc_reset(crtc, &state->base); -} - -static struct drm_crtc_state * -dm_crtc_duplicate_state(struct drm_crtc *crtc) -{ - struct dm_crtc_state *state, *cur; - - cur = to_dm_crtc_state(crtc->state); - - if (WARN_ON(!crtc->state)) - return NULL; - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (!state) - return NULL; - - __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base); - - if (cur->stream) { - state->stream = cur->stream; - dc_stream_retain(state->stream); - } - - state->active_planes = cur->active_planes; - state->vrr_infopacket = cur->vrr_infopacket; - state->abm_level = cur->abm_level; - state->vrr_supported = cur->vrr_supported; - state->freesync_config = cur->freesync_config; - state->cm_has_degamma = cur->cm_has_degamma; - state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; - state->mpo_requested = cur->mpo_requested; - /* TODO Duplicate dc_stream after objects are stream object is flattened */ - - return &state->base; -} - -#ifdef CONFIG_DEBUG_FS -static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) -{ - crtc_debugfs_init(crtc); - - return 0; -} -#endif - -static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) -{ - enum dc_irq_source irq_source; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - int rc; - - irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; - - rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; - - DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n", - acrtc->crtc_id, enable ? "en" : "dis", rc); - return rc; -} - -static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) -{ - enum dc_irq_source irq_source; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); - struct amdgpu_display_manager *dm = &adev->dm; - struct vblank_control_work *work; - int rc = 0; - - if (enable) { - /* vblank irq on -> Only need vupdate irq in vrr mode */ - if (amdgpu_dm_vrr_active(acrtc_state)) - rc = dm_set_vupdate_irq(crtc, true); - } else { - /* vblank irq off -> vupdate irq off */ - rc = dm_set_vupdate_irq(crtc, false); - } - - if (rc) - return rc; - - irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; - - if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) - return -EBUSY; - - if (amdgpu_in_reset(adev)) - return 0; - - if (dm->vblank_control_workqueue) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - - INIT_WORK(&work->work, vblank_control_worker); - work->dm = dm; - work->acrtc = acrtc; - work->enable = enable; - - if (acrtc_state->stream) { - dc_stream_retain(acrtc_state->stream); - work->stream = acrtc_state->stream; - } - - queue_work(dm->vblank_control_workqueue, &work->work); - } - - return 0; -} - -static int dm_enable_vblank(struct drm_crtc *crtc) -{ - return dm_set_vblank(crtc, true); -} - -static void dm_disable_vblank(struct drm_crtc *crtc) -{ - dm_set_vblank(crtc, false); -} - -/* Implemented only the options currently available for the driver */ -static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - .reset = dm_crtc_reset_state, - .destroy = amdgpu_dm_crtc_destroy, - .set_config = drm_atomic_helper_set_config, - .page_flip = drm_atomic_helper_page_flip, - .atomic_duplicate_state = dm_crtc_duplicate_state, - .atomic_destroy_state = dm_crtc_destroy_state, - .set_crc_source = amdgpu_dm_crtc_set_crc_source, - .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, - .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, - .enable_vblank = dm_enable_vblank, - .disable_vblank = dm_disable_vblank, - .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -#if defined(CONFIG_DEBUG_FS) - .late_register = amdgpu_dm_crtc_late_register, -#endif -}; - static enum drm_connector_status amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) { @@ -6999,7 +5848,8 @@ amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) !aconnector->fake_enable) connected = (aconnector->dc_sink != NULL); else - connected = (aconnector->base.force == DRM_FORCE_ON); + connected = (aconnector->base.force == DRM_FORCE_ON || + aconnector->base.force == DRM_FORCE_ON_DIGITAL); update_subconnector_property(aconnector); @@ -7123,18 +5973,21 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) int i; /* - * Call only if mst_mgr was iniitalized before since it's not done + * Call only if mst_mgr was initialized before since it's not done * for all connector types. */ if (aconnector->mst_mgr.dev) drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr); +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ + defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) for (i = 0; i < dm->num_of_edps; i++) { if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) { backlight_device_unregister(dm->backlight_dev[i]); dm->backlight_dev[i] = NULL; } } +#endif if (aconnector->dc_em_sink) dc_sink_release(aconnector->dc_em_sink); @@ -7175,6 +6028,7 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->base.max_requested_bpc = 8; state->vcpi_slots = 0; state->pbn = 0; + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) state->abm_level = amdgpu_dm_abm_level; @@ -7512,113 +6366,6 @@ amdgpu_dm_connector_helper_funcs = { .atomic_check = amdgpu_dm_connector_atomic_check, }; -static void dm_crtc_helper_disable(struct drm_crtc *crtc) -{ -} - -static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) -{ - struct drm_atomic_state *state = new_crtc_state->state; - struct drm_plane *plane; - int num_active = 0; - - drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) { - struct drm_plane_state *new_plane_state; - - /* Cursor planes are "fake". */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) - continue; - - new_plane_state = drm_atomic_get_new_plane_state(state, plane); - - if (!new_plane_state) { - /* - * The plane is enable on the CRTC and hasn't changed - * state. This means that it previously passed - * validation and is therefore enabled. - */ - num_active += 1; - continue; - } - - /* We need a framebuffer to be considered enabled. */ - num_active += (new_plane_state->fb != NULL); - } - - return num_active; -} - -static void dm_update_crtc_active_planes(struct drm_crtc *crtc, - struct drm_crtc_state *new_crtc_state) -{ - struct dm_crtc_state *dm_new_crtc_state = - to_dm_crtc_state(new_crtc_state); - - dm_new_crtc_state->active_planes = 0; - - if (!dm_new_crtc_state->stream) - return; - - dm_new_crtc_state->active_planes = - count_crtc_active_planes(new_crtc_state); -} - -static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, - struct drm_atomic_state *state) -{ - struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, - crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct dc *dc = adev->dm.dc; - struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); - int ret = -EINVAL; - - trace_amdgpu_dm_crtc_atomic_check(crtc_state); - - dm_update_crtc_active_planes(crtc, crtc_state); - - if (WARN_ON(unlikely(!dm_crtc_state->stream && - modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { - return ret; - } - - /* - * We require the primary plane to be enabled whenever the CRTC is, otherwise - * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other - * planes are disabled, which is not supported by the hardware. And there is legacy - * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. - */ - if (crtc_state->enable && - !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { - DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); - return -EINVAL; - } - - /* In some use cases, like reset, no stream is attached */ - if (!dm_crtc_state->stream) - return 0; - - if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) - return 0; - - DRM_DEBUG_ATOMIC("Failed DC stream validation\n"); - return ret; -} - -static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return true; -} - -static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = { - .disable = dm_crtc_helper_disable, - .atomic_check = dm_crtc_helper_atomic_check, - .mode_fixup = dm_crtc_helper_mode_fixup, - .get_scanout_position = amdgpu_crtc_get_scanout_position, -}; - static void dm_encoder_helper_disable(struct drm_encoder *encoder) { @@ -7627,21 +6374,21 @@ static void dm_encoder_helper_disable(struct drm_encoder *encoder) int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth) { switch (display_color_depth) { - case COLOR_DEPTH_666: - return 6; - case COLOR_DEPTH_888: - return 8; - case COLOR_DEPTH_101010: - return 10; - case COLOR_DEPTH_121212: - return 12; - case COLOR_DEPTH_141414: - return 14; - case COLOR_DEPTH_161616: - return 16; - default: - break; - } + case COLOR_DEPTH_666: + return 6; + case COLOR_DEPTH_888: + return 8; + case COLOR_DEPTH_101010: + return 10; + case COLOR_DEPTH_121212: + return 12; + case COLOR_DEPTH_141414: + return 14; + case COLOR_DEPTH_161616: + return 16; + default: + break; + } return 0; } @@ -7672,7 +6419,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) && - aconnector->force_yuv420_output; + aconnector->force_yuv420_output; color_depth = convert_color_depth_from_display_info(connector, is_y420, max_bpc); @@ -7727,7 +6474,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, if (!stream) continue; - if ((struct amdgpu_dm_connector*)stream->dm_stream_context == aconnector) + if ((struct amdgpu_dm_connector *)stream->dm_stream_context == aconnector) break; stream = NULL; @@ -7776,532 +6523,6 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } #endif -static void dm_drm_plane_reset(struct drm_plane *plane) -{ - struct dm_plane_state *amdgpu_state = NULL; - - if (plane->state) - plane->funcs->atomic_destroy_state(plane, plane->state); - - amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); - WARN_ON(amdgpu_state == NULL); - - if (amdgpu_state) - __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); -} - -static struct drm_plane_state * -dm_drm_plane_duplicate_state(struct drm_plane *plane) -{ - struct dm_plane_state *dm_plane_state, *old_dm_plane_state; - - old_dm_plane_state = to_dm_plane_state(plane->state); - dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL); - if (!dm_plane_state) - return NULL; - - __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base); - - if (old_dm_plane_state->dc_state) { - dm_plane_state->dc_state = old_dm_plane_state->dc_state; - dc_plane_state_retain(dm_plane_state->dc_state); - } - - return &dm_plane_state->base; -} - -static void dm_drm_plane_destroy_state(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); - - if (dm_plane_state->dc_state) - dc_plane_state_release(dm_plane_state->dc_state); - - drm_atomic_helper_plane_destroy_state(plane, state); -} - -static const struct drm_plane_funcs dm_plane_funcs = { - .update_plane = drm_atomic_helper_update_plane, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = drm_primary_helper_destroy, - .reset = dm_drm_plane_reset, - .atomic_duplicate_state = dm_drm_plane_duplicate_state, - .atomic_destroy_state = dm_drm_plane_destroy_state, - .format_mod_supported = dm_plane_format_mod_supported, -}; - -static int dm_plane_helper_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) -{ - struct amdgpu_framebuffer *afb; - struct drm_gem_object *obj; - struct amdgpu_device *adev; - struct amdgpu_bo *rbo; - struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; - uint32_t domain; - int r; - - if (!new_state->fb) { - DRM_DEBUG_KMS("No FB bound\n"); - return 0; - } - - afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; - rbo = gem_to_amdgpu_bo(obj); - adev = amdgpu_ttm_adev(rbo->tbo.bdev); - - r = amdgpu_bo_reserve(rbo, true); - if (r) { - dev_err(adev->dev, "fail to reserve bo (%d)\n", r); - return r; - } - - r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); - if (r) { - dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); - goto error_unlock; - } - - if (plane->type != DRM_PLANE_TYPE_CURSOR) - domain = amdgpu_display_supported_domains(adev, rbo->flags); - else - domain = AMDGPU_GEM_DOMAIN_VRAM; - - r = amdgpu_bo_pin(rbo, domain); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("Failed to pin framebuffer with error %d\n", r); - goto error_unlock; - } - - r = amdgpu_ttm_alloc_gart(&rbo->tbo); - if (unlikely(r != 0)) { - DRM_ERROR("%p bind failed\n", rbo); - goto error_unpin; - } - - r = drm_gem_plane_helper_prepare_fb(plane, new_state); - if (unlikely(r != 0)) - goto error_unpin; - - amdgpu_bo_unreserve(rbo); - - afb->address = amdgpu_bo_gpu_offset(rbo); - - amdgpu_bo_ref(rbo); - - /** - * We don't do surface updates on planes that have been newly created, - * but we also don't have the afb->address during atomic check. - * - * Fill in buffer attributes depending on the address here, but only on - * newly created planes since they're not being used by DC yet and this - * won't modify global state. - */ - dm_plane_state_old = to_dm_plane_state(plane->state); - dm_plane_state_new = to_dm_plane_state(new_state); - - if (dm_plane_state_new->dc_state && - dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { - struct dc_plane_state *plane_state = - dm_plane_state_new->dc_state; - bool force_disable_dcc = !plane_state->dcc.enable; - - fill_plane_buffer_attributes( - adev, afb, plane_state->format, plane_state->rotation, - afb->tiling_flags, - &plane_state->tiling_info, &plane_state->plane_size, - &plane_state->dcc, &plane_state->address, - afb->tmz_surface, force_disable_dcc); - } - - return 0; - -error_unpin: - amdgpu_bo_unpin(rbo); - -error_unlock: - amdgpu_bo_unreserve(rbo); - return r; -} - -static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ - struct amdgpu_bo *rbo; - int r; - - if (!old_state->fb) - return; - - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); - r = amdgpu_bo_reserve(rbo, false); - if (unlikely(r)) { - DRM_ERROR("failed to reserve rbo before unpin\n"); - return; - } - - amdgpu_bo_unpin(rbo); - amdgpu_bo_unreserve(rbo); - amdgpu_bo_unref(&rbo); -} - -static int dm_plane_helper_check_state(struct drm_plane_state *state, - struct drm_crtc_state *new_crtc_state) -{ - struct drm_framebuffer *fb = state->fb; - int min_downscale, max_upscale; - int min_scale = 0; - int max_scale = INT_MAX; - - /* Plane enabled? Validate viewport and get scaling factors from plane caps. */ - if (fb && state->crtc) { - /* Validate viewport to cover the case when only the position changes */ - if (state->plane->type != DRM_PLANE_TYPE_CURSOR) { - int viewport_width = state->crtc_w; - int viewport_height = state->crtc_h; - - if (state->crtc_x < 0) - viewport_width += state->crtc_x; - else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay) - viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x; - - if (state->crtc_y < 0) - viewport_height += state->crtc_y; - else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay) - viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y; - - if (viewport_width < 0 || viewport_height < 0) { - DRM_DEBUG_ATOMIC("Plane completely outside of screen\n"); - return -EINVAL; - } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */ - DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2); - return -EINVAL; - } else if (viewport_height < MIN_VIEWPORT_SIZE) { - DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE); - return -EINVAL; - } - - } - - /* Get min/max allowed scaling factors from plane caps. */ - get_min_max_dc_plane_scaling(state->crtc->dev, fb, - &min_downscale, &max_upscale); - /* - * Convert to drm convention: 16.16 fixed point, instead of dc's - * 1.0 == 1000. Also drm scaling is src/dst instead of dc's - * dst/src, so min_scale = 1.0 / max_upscale, etc. - */ - min_scale = (1000 << 16) / max_upscale; - max_scale = (1000 << 16) / min_downscale; - } - - return drm_atomic_helper_check_plane_state( - state, new_crtc_state, min_scale, max_scale, true, true); -} - -static int dm_plane_atomic_check(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, - plane); - struct amdgpu_device *adev = drm_to_adev(plane->dev); - struct dc *dc = adev->dm.dc; - struct dm_plane_state *dm_plane_state; - struct dc_scaling_info scaling_info; - struct drm_crtc_state *new_crtc_state; - int ret; - - trace_amdgpu_dm_plane_atomic_check(new_plane_state); - - dm_plane_state = to_dm_plane_state(new_plane_state); - - if (!dm_plane_state->dc_state) - return 0; - - new_crtc_state = - drm_atomic_get_new_crtc_state(state, - new_plane_state->crtc); - if (!new_crtc_state) - return -EINVAL; - - ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state); - if (ret) - return ret; - - ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info); - if (ret) - return ret; - - if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK) - return 0; - - return -EINVAL; -} - -static int dm_plane_atomic_async_check(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - /* Only support async updates on cursor planes. */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) - return -EINVAL; - - return 0; -} - -static void dm_plane_atomic_async_update(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, - plane); - struct drm_plane_state *old_state = - drm_atomic_get_old_plane_state(state, plane); - - trace_amdgpu_dm_atomic_update_cursor(new_state); - - swap(plane->state->fb, new_state->fb); - - plane->state->src_x = new_state->src_x; - plane->state->src_y = new_state->src_y; - plane->state->src_w = new_state->src_w; - plane->state->src_h = new_state->src_h; - plane->state->crtc_x = new_state->crtc_x; - plane->state->crtc_y = new_state->crtc_y; - plane->state->crtc_w = new_state->crtc_w; - plane->state->crtc_h = new_state->crtc_h; - - handle_cursor_update(plane, old_state); -} - -static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { - .prepare_fb = dm_plane_helper_prepare_fb, - .cleanup_fb = dm_plane_helper_cleanup_fb, - .atomic_check = dm_plane_atomic_check, - .atomic_async_check = dm_plane_atomic_async_check, - .atomic_async_update = dm_plane_atomic_async_update -}; - -/* - * TODO: these are currently initialized to rgb formats only. - * For future use cases we should either initialize them dynamically based on - * plane capabilities, or initialize this array to all formats, so internal drm - * check will succeed, and let DC implement proper check - */ -static const uint32_t rgb_formats[] = { - DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_XRGB2101010, - DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ARGB2101010, - DRM_FORMAT_ABGR2101010, - DRM_FORMAT_XRGB16161616, - DRM_FORMAT_XBGR16161616, - DRM_FORMAT_ARGB16161616, - DRM_FORMAT_ABGR16161616, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_RGB565, -}; - -static const uint32_t overlay_formats[] = { - DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_RGB565 -}; - -static const u32 cursor_formats[] = { - DRM_FORMAT_ARGB8888 -}; - -static int get_plane_formats(const struct drm_plane *plane, - const struct dc_plane_cap *plane_cap, - uint32_t *formats, int max_formats) -{ - int i, num_formats = 0; - - /* - * TODO: Query support for each group of formats directly from - * DC plane caps. This will require adding more formats to the - * caps list. - */ - - switch (plane->type) { - case DRM_PLANE_TYPE_PRIMARY: - for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = rgb_formats[i]; - } - - if (plane_cap && plane_cap->pixel_format_support.nv12) - formats[num_formats++] = DRM_FORMAT_NV12; - if (plane_cap && plane_cap->pixel_format_support.p010) - formats[num_formats++] = DRM_FORMAT_P010; - if (plane_cap && plane_cap->pixel_format_support.fp16) { - formats[num_formats++] = DRM_FORMAT_XRGB16161616F; - formats[num_formats++] = DRM_FORMAT_ARGB16161616F; - formats[num_formats++] = DRM_FORMAT_XBGR16161616F; - formats[num_formats++] = DRM_FORMAT_ABGR16161616F; - } - break; - - case DRM_PLANE_TYPE_OVERLAY: - for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = overlay_formats[i]; - } - break; - - case DRM_PLANE_TYPE_CURSOR: - for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = cursor_formats[i]; - } - break; - } - - return num_formats; -} - -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs, - const struct dc_plane_cap *plane_cap) -{ - uint32_t formats[32]; - int num_formats; - int res = -EPERM; - unsigned int supported_rotations; - uint64_t *modifiers = NULL; - - num_formats = get_plane_formats(plane, plane_cap, formats, - ARRAY_SIZE(formats)); - - res = get_plane_modifiers(dm->adev, plane->type, &modifiers); - if (res) - return res; - - if (modifiers == NULL) - adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true; - - res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, - &dm_plane_funcs, formats, num_formats, - modifiers, plane->type, NULL); - kfree(modifiers); - if (res) - return res; - - if (plane->type == DRM_PLANE_TYPE_OVERLAY && - plane_cap && plane_cap->per_pixel_alpha) { - unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | - BIT(DRM_MODE_BLEND_PREMULTI) | - BIT(DRM_MODE_BLEND_COVERAGE); - - drm_plane_create_alpha_property(plane); - drm_plane_create_blend_mode_property(plane, blend_caps); - } - - if (plane->type == DRM_PLANE_TYPE_PRIMARY && - plane_cap && - (plane_cap->pixel_format_support.nv12 || - plane_cap->pixel_format_support.p010)) { - /* This only affects YUV formats. */ - drm_plane_create_color_properties( - plane, - BIT(DRM_COLOR_YCBCR_BT601) | - BIT(DRM_COLOR_YCBCR_BT709) | - BIT(DRM_COLOR_YCBCR_BT2020), - BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | - BIT(DRM_COLOR_YCBCR_FULL_RANGE), - DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); - } - - supported_rotations = - DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | - DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; - - if (dm->adev->asic_type >= CHIP_BONAIRE && - plane->type != DRM_PLANE_TYPE_CURSOR) - drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, - supported_rotations); - - drm_plane_helper_add(plane, &dm_plane_helper_funcs); - - /* Create (reset) the plane state */ - if (plane->funcs->reset) - plane->funcs->reset(plane); - - return 0; -} - -static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - uint32_t crtc_index) -{ - struct amdgpu_crtc *acrtc = NULL; - struct drm_plane *cursor_plane; - - int res = -ENOMEM; - - cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); - if (!cursor_plane) - goto fail; - - cursor_plane->type = DRM_PLANE_TYPE_CURSOR; - res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL); - - acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); - if (!acrtc) - goto fail; - - res = drm_crtc_init_with_planes( - dm->ddev, - &acrtc->base, - plane, - cursor_plane, - &amdgpu_dm_crtc_funcs, NULL); - - if (res) - goto fail; - - drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs); - - /* Create (reset) the plane state */ - if (acrtc->base.funcs->reset) - acrtc->base.funcs->reset(&acrtc->base); - - acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size; - acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size; - - acrtc->crtc_id = crtc_index; - acrtc->base.enabled = false; - acrtc->otg_inst = -1; - - dm->adev->mode_info.crtcs[crtc_index] = acrtc; - drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, - true, MAX_COLOR_LUT_ENTRIES); - drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); - - return 0; - -fail: - kfree(acrtc); - kfree(cursor_plane); - return res; -} - - static int to_drm_connector_type(enum signal_type st) { switch (st) { @@ -9084,114 +7305,6 @@ static void remove_stream(struct amdgpu_device *adev, acrtc->enabled = false; } -static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, - struct dc_cursor_position *position) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int x, y; - int xorigin = 0, yorigin = 0; - - if (!crtc || !plane->state->fb) - return 0; - - if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || - (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { - DRM_ERROR("%s: bad cursor width or height %d x %d\n", - __func__, - plane->state->crtc_w, - plane->state->crtc_h); - return -EINVAL; - } - - x = plane->state->crtc_x; - y = plane->state->crtc_y; - - if (x <= -amdgpu_crtc->max_cursor_width || - y <= -amdgpu_crtc->max_cursor_height) - return 0; - - if (x < 0) { - xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { - yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); - y = 0; - } - position->enable = true; - position->translate_by_source = true; - position->x = x; - position->y = y; - position->x_hotspot = xorigin; - position->y_hotspot = yorigin; - - return 0; -} - -static void handle_cursor_update(struct drm_plane *plane, - struct drm_plane_state *old_plane_state) -{ - struct amdgpu_device *adev = drm_to_adev(plane->dev); - struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); - struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; - struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - uint64_t address = afb ? afb->address : 0; - struct dc_cursor_position position = {0}; - struct dc_cursor_attributes attributes; - int ret; - - if (!plane->state->fb && !old_plane_state->fb) - return; - - DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n", - __func__, - amdgpu_crtc->crtc_id, - plane->state->crtc_w, - plane->state->crtc_h); - - ret = get_cursor_position(plane, crtc, &position); - if (ret) - return; - - if (!position.enable) { - /* turn off cursor */ - if (crtc_state && crtc_state->stream) { - mutex_lock(&adev->dm.dc_lock); - dc_stream_set_cursor_position(crtc_state->stream, - &position); - mutex_unlock(&adev->dm.dc_lock); - } - return; - } - - amdgpu_crtc->cursor_width = plane->state->crtc_w; - amdgpu_crtc->cursor_height = plane->state->crtc_h; - - memset(&attributes, 0, sizeof(attributes)); - attributes.address.high_part = upper_32_bits(address); - attributes.address.low_part = lower_32_bits(address); - attributes.width = plane->state->crtc_w; - attributes.height = plane->state->crtc_h; - attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA; - attributes.rotation_angle = 0; - attributes.attribute_flags.value = 0; - - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; - - if (crtc_state->stream) { - mutex_lock(&adev->dm.dc_lock); - if (!dc_stream_set_cursor_attributes(crtc_state->stream, - &attributes)) - DRM_ERROR("DC failed to set cursor attributes\n"); - - if (!dc_stream_set_cursor_position(crtc_state->stream, - &position)) - DRM_ERROR("DC failed to set cursor position\n"); - mutex_unlock(&adev->dm.dc_lock); - } -} - static void prepare_flip_isr(struct amdgpu_crtc *acrtc) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 7bd750e9f891..90b306a1dd68 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -571,6 +571,14 @@ struct dsc_preferred_settings { bool dsc_force_disable_passthrough; }; +enum mst_progress_status { + MST_STATUS_DEFAULT = 0, + MST_PROBE = BIT(0), + MST_REMOTE_EDID = BIT(1), + MST_ALLOCATE_NEW_PAYLOAD = BIT(2), + MST_CLEAR_ALLOCATED_PAYLOAD = BIT(3), +}; + struct amdgpu_dm_connector { struct drm_connector base; @@ -623,8 +631,20 @@ struct amdgpu_dm_connector { struct drm_display_mode freesync_vid_base; int psr_skip_count; + + /* Record progress status of mst*/ + uint8_t mst_status; }; +static inline void amdgpu_dm_set_mst_status(uint8_t *status, + uint8_t flags, bool set) +{ + if (set) + *status |= flags; + else + *status &= ~flags; +} + #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) extern const struct amdgpu_ip_block_version dm_ip_block; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c new file mode 100644 index 000000000000..594fe8a4d02b --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include <drm/drm_vblank.h> +#include <drm/drm_atomic_helper.h> + +#include "dc.h" +#include "amdgpu.h" +#include "amdgpu_dm_psr.h" +#include "amdgpu_dm_crtc.h" +#include "amdgpu_dm_plane.h" +#include "amdgpu_dm_trace.h" +#include "amdgpu_dm_debugfs.h" + +void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) +{ + struct drm_crtc *crtc = &acrtc->base; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + drm_crtc_handle_vblank(crtc); + + spin_lock_irqsave(&dev->event_lock, flags); + + /* Send completion event for cursor-only commits */ + if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + drm_crtc_send_vblank_event(crtc, acrtc->event); + drm_crtc_vblank_put(crtc); + acrtc->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); +} + +bool modeset_required(struct drm_crtc_state *crtc_state, + struct dc_stream_state *new_stream, + struct dc_stream_state *old_stream) +{ + return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); +} + +bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc) + +{ + return acrtc->dm_irq_params.freesync_config.state == + VRR_STATE_ACTIVE_VARIABLE || + acrtc->dm_irq_params.freesync_config.state == + VRR_STATE_ACTIVE_FIXED; +} + +int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) +{ + enum dc_irq_source irq_source; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + int rc; + + irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; + + rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; + + DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n", + acrtc->crtc_id, enable ? "en" : "dis", rc); + return rc; +} + +bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) +{ + return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || + dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; +} + +static void vblank_control_worker(struct work_struct *work) +{ + struct vblank_control_work *vblank_work = + container_of(work, struct vblank_control_work, work); + struct amdgpu_display_manager *dm = vblank_work->dm; + + mutex_lock(&dm->dc_lock); + + if (vblank_work->enable) + dm->active_vblank_irq_count++; + else if (dm->active_vblank_irq_count) + dm->active_vblank_irq_count--; + + dc_allow_idle_optimizations( + dm->dc, dm->active_vblank_irq_count == 0 ? true : false); + + DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); + + /* + * Control PSR based on vblank requirements from OS + * + * If panel supports PSR SU, there's no need to disable PSR when OS is + * submitting fast atomic commits (we infer this by whether the OS + * requests vblank events). Fast atomic commits will simply trigger a + * full-frame-update (FFU); a specific case of selective-update (SU) + * where the SU region is the full hactive*vactive region. See + * fill_dc_dirty_rects(). + */ + if (vblank_work->stream && vblank_work->stream->link) { + if (vblank_work->enable) { + if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && + vblank_work->stream->link->psr_settings.psr_allow_active) + amdgpu_dm_psr_disable(vblank_work->stream); + } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && + !vblank_work->stream->link->psr_settings.psr_allow_active && + vblank_work->acrtc->dm_irq_params.allow_psr_entry) { + amdgpu_dm_psr_enable(vblank_work->stream); + } + } + + mutex_unlock(&dm->dc_lock); + + dc_stream_release(vblank_work->stream); + + kfree(vblank_work); +} + +static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) +{ + enum dc_irq_source irq_source; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); + struct amdgpu_display_manager *dm = &adev->dm; + struct vblank_control_work *work; + int rc = 0; + + if (enable) { + /* vblank irq on -> Only need vupdate irq in vrr mode */ + if (amdgpu_dm_vrr_active(acrtc_state)) + rc = dm_set_vupdate_irq(crtc, true); + } else { + /* vblank irq off -> vupdate irq off */ + rc = dm_set_vupdate_irq(crtc, false); + } + + if (rc) + return rc; + + irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; + + if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) + return -EBUSY; + + if (amdgpu_in_reset(adev)) + return 0; + + if (dm->vblank_control_workqueue) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, vblank_control_worker); + work->dm = dm; + work->acrtc = acrtc; + work->enable = enable; + + if (acrtc_state->stream) { + dc_stream_retain(acrtc_state->stream); + work->stream = acrtc_state->stream; + } + + queue_work(dm->vblank_control_workqueue, &work->work); + } + + return 0; +} + +int dm_enable_vblank(struct drm_crtc *crtc) +{ + return dm_set_vblank(crtc, true); +} + +void dm_disable_vblank(struct drm_crtc *crtc) +{ + dm_set_vblank(crtc, false); +} + +static void dm_crtc_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct dm_crtc_state *cur = to_dm_crtc_state(state); + + /* TODO Destroy dc_stream objects are stream object is flattened */ + if (cur->stream) + dc_stream_release(cur->stream); + + + __drm_atomic_helper_crtc_destroy_state(state); + + + kfree(state); +} + +static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) +{ + struct dm_crtc_state *state, *cur; + + cur = to_dm_crtc_state(crtc->state); + + if (WARN_ON(!crtc->state)) + return NULL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base); + + if (cur->stream) { + state->stream = cur->stream; + dc_stream_retain(state->stream); + } + + state->active_planes = cur->active_planes; + state->vrr_infopacket = cur->vrr_infopacket; + state->abm_level = cur->abm_level; + state->vrr_supported = cur->vrr_supported; + state->freesync_config = cur->freesync_config; + state->cm_has_degamma = cur->cm_has_degamma; + state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->crc_skip_count = cur->crc_skip_count; + state->mpo_requested = cur->mpo_requested; + /* TODO Duplicate dc_stream after objects are stream object is flattened */ + + return &state->base; +} + +static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc) +{ + drm_crtc_cleanup(crtc); + kfree(crtc); +} + +static void dm_crtc_reset_state(struct drm_crtc *crtc) +{ + struct dm_crtc_state *state; + + if (crtc->state) + dm_crtc_destroy_state(crtc, crtc->state); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (WARN_ON(!state)) + return; + + __drm_atomic_helper_crtc_reset(crtc, &state->base); +} + +#ifdef CONFIG_DEBUG_FS +static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) +{ + crtc_debugfs_init(crtc); + + return 0; +} +#endif + +/* Implemented only the options currently available for the driver */ +static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { + .reset = dm_crtc_reset_state, + .destroy = amdgpu_dm_crtc_destroy, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .atomic_duplicate_state = dm_crtc_duplicate_state, + .atomic_destroy_state = dm_crtc_destroy_state, + .set_crc_source = amdgpu_dm_crtc_set_crc_source, + .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, + .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, + .get_vblank_counter = amdgpu_get_vblank_counter_kms, + .enable_vblank = dm_enable_vblank, + .disable_vblank = dm_disable_vblank, + .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#if defined(CONFIG_DEBUG_FS) + .late_register = amdgpu_dm_crtc_late_register, +#endif +}; + +static void dm_crtc_helper_disable(struct drm_crtc *crtc) +{ +} + +static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) +{ + struct drm_atomic_state *state = new_crtc_state->state; + struct drm_plane *plane; + int num_active = 0; + + drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) { + struct drm_plane_state *new_plane_state; + + /* Cursor planes are "fake". */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) + continue; + + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + + if (!new_plane_state) { + /* + * The plane is enable on the CRTC and hasn't changed + * state. This means that it previously passed + * validation and is therefore enabled. + */ + num_active += 1; + continue; + } + + /* We need a framebuffer to be considered enabled. */ + num_active += (new_plane_state->fb != NULL); + } + + return num_active; +} + +static void dm_update_crtc_active_planes(struct drm_crtc *crtc, + struct drm_crtc_state *new_crtc_state) +{ + struct dm_crtc_state *dm_new_crtc_state = + to_dm_crtc_state(new_crtc_state); + + dm_new_crtc_state->active_planes = 0; + + if (!dm_new_crtc_state->stream) + return; + + dm_new_crtc_state->active_planes = + count_crtc_active_planes(new_crtc_state); +} + +static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + return true; +} + +static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, + crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dc *dc = adev->dm.dc; + struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); + int ret = -EINVAL; + + trace_amdgpu_dm_crtc_atomic_check(crtc_state); + + dm_update_crtc_active_planes(crtc, crtc_state); + + if (WARN_ON(unlikely(!dm_crtc_state->stream && + modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { + return ret; + } + + /* + * We require the primary plane to be enabled whenever the CRTC is, otherwise + * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other + * planes are disabled, which is not supported by the hardware. And there is legacy + * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. + */ + if (crtc_state->enable && + !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { + DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); + return -EINVAL; + } + + /* In some use cases, like reset, no stream is attached */ + if (!dm_crtc_state->stream) + return 0; + + if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) + return 0; + + DRM_DEBUG_ATOMIC("Failed DC stream validation\n"); + return ret; +} + +static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = { + .disable = dm_crtc_helper_disable, + .atomic_check = dm_crtc_helper_atomic_check, + .mode_fixup = dm_crtc_helper_mode_fixup, + .get_scanout_position = amdgpu_crtc_get_scanout_position, +}; + +int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + uint32_t crtc_index) +{ + struct amdgpu_crtc *acrtc = NULL; + struct drm_plane *cursor_plane; + + int res = -ENOMEM; + + cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); + if (!cursor_plane) + goto fail; + + cursor_plane->type = DRM_PLANE_TYPE_CURSOR; + res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL); + + acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); + if (!acrtc) + goto fail; + + res = drm_crtc_init_with_planes( + dm->ddev, + &acrtc->base, + plane, + cursor_plane, + &amdgpu_dm_crtc_funcs, NULL); + + if (res) + goto fail; + + drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs); + + /* Create (reset) the plane state */ + if (acrtc->base.funcs->reset) + acrtc->base.funcs->reset(&acrtc->base); + + acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size; + acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size; + + acrtc->crtc_id = crtc_index; + acrtc->base.enabled = false; + acrtc->otg_inst = -1; + + dm->adev->mode_info.crtcs[crtc_index] = acrtc; + drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, + true, MAX_COLOR_LUT_ENTRIES); + drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); + + return 0; + +fail: + kfree(acrtc); + kfree(cursor_plane); + return res; +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h new file mode 100644 index 000000000000..1ac8692354cf --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_CRTC_H__ +#define __AMDGPU_DM_CRTC_H__ + +void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc); + +bool modeset_required(struct drm_crtc_state *crtc_state, + struct dc_stream_state *new_stream, + struct dc_stream_state *old_stream); + +int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable); + +bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc); + +bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state); + +int dm_enable_vblank(struct drm_crtc *crtc); + +void dm_disable_vblank(struct drm_crtc *crtc); + +int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + uint32_t link_index); + +#endif + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index a1f40d0cd41c..aa4edf182095 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -50,6 +50,13 @@ struct dmub_debugfs_trace_entry { uint32_t param1; }; +static const char *const mst_progress_status[] = { + "probe", + "remote_edid", + "allocate_new_payload", + "clear_allocated_payload", +}; + /* parse_write_buffer_into_params - Helper function to parse debugfs write buffer into an array * * Function takes in attributes passed to debugfs write entry @@ -1256,14 +1263,22 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, return -EINVAL; } + kfree(wr_buf); + if (param_nums <= 0) { DRM_DEBUG_DRIVER("user data not be read\n"); - kfree(wr_buf); + return -EINVAL; + } + + mutex_lock(&aconnector->hpd_lock); + + /* Don't support for mst end device*/ + if (aconnector->mst_port) { + mutex_unlock(&aconnector->hpd_lock); return -EINVAL; } if (param[0] == 1) { - mutex_lock(&aconnector->hpd_lock); if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type) && new_connection_type != dc_connection_none) @@ -1300,6 +1315,10 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, amdgpu_dm_update_connector_after_detect(aconnector); + /* If the aconnector is the root node in mst topology */ + if (aconnector->mst_mgr.mst_state == true) + reset_cur_dp_mst_topology(link); + drm_modeset_lock_all(dev); dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); @@ -1310,7 +1329,6 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, unlock: mutex_unlock(&aconnector->hpd_lock); - kfree(wr_buf); return size; } @@ -2529,6 +2547,92 @@ static int target_backlight_show(struct seq_file *m, void *unused) return 0; } +/* + * function description: Determine if the connector is mst connector + * + * This function helps to determine whether a connector is a mst connector. + * - "root" stands for the root connector of the topology + * - "branch" stands for branch device of the topology + * - "end" stands for leaf node connector of the topology + * - "no" stands for the connector is not a device of a mst topology + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector + * + */ +static int dp_is_mst_connector_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct drm_dp_mst_topology_mgr *mgr = NULL; + struct drm_dp_mst_port *port = NULL; + char *role = NULL; + + mutex_lock(&aconnector->hpd_lock); + + if (aconnector->mst_mgr.mst_state) { + role = "root"; + } else if (aconnector->mst_port && + aconnector->mst_port->mst_mgr.mst_state) { + + role = "end"; + + mgr = &aconnector->mst_port->mst_mgr; + port = aconnector->port; + + drm_modeset_lock(&mgr->base.lock, NULL); + if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING && + port->mcs) + role = "branch"; + drm_modeset_unlock(&mgr->base.lock); + + } else { + role = "no"; + } + + seq_printf(m, "%s\n", role); + + mutex_unlock(&aconnector->hpd_lock); + + return 0; +} + +/* + * function description: Read out the mst progress status + * + * This function helps to determine the mst progress status of + * a mst connector. + * + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/mst_progress_status + * + */ +static int dp_mst_progress_status_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_device *adev = drm_to_adev(connector->dev); + int i; + + mutex_lock(&aconnector->hpd_lock); + mutex_lock(&adev->dm.dc_lock); + + if (aconnector->mst_status == MST_STATUS_DEFAULT) { + seq_puts(m, "disabled\n"); + } else { + for (i = 0; i < sizeof(mst_progress_status)/sizeof(char *); i++) + seq_printf(m, "%s:%s\n", + mst_progress_status[i], + aconnector->mst_status & BIT(i) ? "done" : "not_done"); + } + + mutex_unlock(&adev->dm.dc_lock); + mutex_unlock(&aconnector->hpd_lock); + + return 0; +} + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2538,6 +2642,8 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); #endif DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); +DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); +DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2681,6 +2787,8 @@ static const struct { {"dp_dsc_fec_support", &dp_dsc_fec_support_fops}, {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, + {"is_mst_connector", &dp_is_mst_connector_fops}, + {"mst_progress_status", &dp_mst_progress_status_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index fbb252afb494..6202e31c7e3a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -302,7 +302,7 @@ static void event_property_update(struct work_struct *work) mutex_lock(&hdcp_work->mutex); - if (aconnector->base.state->commit) { + if (aconnector->base.state && aconnector->base.state->commit) { ret = wait_for_completion_interruptible_timeout(&aconnector->base.state->commit->hw_done, 10 * HZ); if (ret == 0) { @@ -311,18 +311,26 @@ static void event_property_update(struct work_struct *work) } } - if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { - if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 && - hdcp_work->encryption_status <= MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED); - else if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE1 && - hdcp_work->encryption_status == MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED); - } else { - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_DESIRED); + if (aconnector->base.state) { + if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { + if (aconnector->base.state->hdcp_content_type == + DRM_MODE_HDCP_CONTENT_TYPE0 && + hdcp_work->encryption_status <= + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + else if (aconnector->base.state->hdcp_content_type == + DRM_MODE_HDCP_CONTENT_TYPE1 && + hdcp_work->encryption_status == + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + } else { + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_DESIRED); + } } - mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); } @@ -495,7 +503,9 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->content_protection : -1, (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - hdcp_update_display(hdcp_work, link_index, aconnector, conn_state->hdcp_content_type, false); + if (conn_state) + hdcp_update_display(hdcp_work, link_index, aconnector, + conn_state->hdcp_content_type, false); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 137645d40b72..d66e3cd64ebd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -312,6 +312,8 @@ bool dm_helpers_dp_mst_send_payload_allocation( struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; + enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD; + enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; @@ -325,8 +327,20 @@ bool dm_helpers_dp_mst_send_payload_allocation( if (!mst_mgr->mst_state) return false; - /* It's OK for this to fail */ - drm_dp_update_payload_part2(mst_mgr); + if (!enable) { + set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; + clr_flag = MST_ALLOCATE_NEW_PAYLOAD; + } + + if (drm_dp_update_payload_part2(mst_mgr)) { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + set_flag, false); + } else { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + set_flag, true); + amdgpu_dm_set_mst_status(&aconnector->mst_status, + clr_flag, false); + } if (!enable) drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 1f722309cfdd..2e74ccf7df5b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -179,6 +179,8 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) aconnector->dc_sink = NULL; aconnector->edid = NULL; } + + aconnector->mst_status = MST_STATUS_DEFAULT; drm_modeset_unlock(&root->mst_mgr.base.lock); } @@ -279,6 +281,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); if (!edid) { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID, false); + drm_connector_update_edid_property( &aconnector->base, NULL); @@ -309,6 +314,8 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) } aconnector->edid = edid; + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID, true); } if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) { @@ -430,6 +437,10 @@ dm_dp_mst_detect(struct drm_connector *connector, dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; aconnector->edid = NULL; + + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, + false); } return connection_status; @@ -526,6 +537,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, connector = &aconnector->base; aconnector->port = port; aconnector->mst_port = master; + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_PROBE, true); if (drm_connector_init( dev, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c new file mode 100644 index 000000000000..8cd25b2ea0dc --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -0,0 +1,1646 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_blend.h> +#include <drm/drm_gem_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_fourcc.h> + +#include "amdgpu.h" +#include "dal_asic_id.h" +#include "amdgpu_display.h" +#include "amdgpu_dm_trace.h" +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" + +/* + * TODO: these are currently initialized to rgb formats only. + * For future use cases we should either initialize them dynamically based on + * plane capabilities, or initialize this array to all formats, so internal drm + * check will succeed, and let DC implement proper check + */ +static const uint32_t rgb_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XRGB16161616, + DRM_FORMAT_XBGR16161616, + DRM_FORMAT_ARGB16161616, + DRM_FORMAT_ABGR16161616, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565, +}; + +static const uint32_t overlay_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565 +}; + +static const u32 cursor_formats[] = { + DRM_FORMAT_ARGB8888 +}; + +enum dm_micro_swizzle { + MICRO_SWIZZLE_Z = 0, + MICRO_SWIZZLE_S = 1, + MICRO_SWIZZLE_D = 2, + MICRO_SWIZZLE_R = 3 +}; + +const struct drm_format_info *amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +{ + return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); +} + +void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value) +{ + *per_pixel_alpha = false; + *pre_multiplied_alpha = true; + *global_alpha = false; + *global_alpha_value = 0xff; + + if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) + return; + + if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || + plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { + static const uint32_t alpha_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_ABGR8888, + }; + uint32_t format = plane_state->fb->format->format; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) { + if (format == alpha_formats[i]) { + *per_pixel_alpha = true; + break; + } + } + + if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + *pre_multiplied_alpha = false; + } + + if (plane_state->alpha < 0xffff) { + *global_alpha = true; + *global_alpha_value = plane_state->alpha >> 8; + } +} + +static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod) +{ + if (!*mods) + return; + + if (*cap - *size < 1) { + uint64_t new_cap = *cap * 2; + uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); + + if (!new_mods) { + kfree(*mods); + *mods = NULL; + return; + } + + memcpy(new_mods, *mods, sizeof(uint64_t) * *size); + kfree(*mods); + *mods = new_mods; + *cap = new_cap; + } + + (*mods)[*size] = mod; + *size += 1; +} + +bool modifier_has_dcc(uint64_t modifier) +{ + return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); +} + +unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) +{ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return 0; + + return AMD_FMT_MOD_GET(TILE, modifier); +} + +static void fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, + uint64_t tiling_flags) +{ + /* Fill GFX8 params */ + if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) { + unsigned int bankw, bankh, mtaspect, tile_split, num_banks; + + bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); + bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); + mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); + tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); + num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); + + /* XXX fix me for VI */ + tiling_info->gfx8.num_banks = num_banks; + tiling_info->gfx8.array_mode = + DC_ARRAY_2D_TILED_THIN1; + tiling_info->gfx8.tile_split = tile_split; + tiling_info->gfx8.bank_width = bankw; + tiling_info->gfx8.bank_height = bankh; + tiling_info->gfx8.tile_aspect = mtaspect; + tiling_info->gfx8.tile_mode = + DC_ADDR_SURF_MICRO_TILING_DISPLAY; + } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) + == DC_ARRAY_1D_TILED_THIN1) { + tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; + } + + tiling_info->gfx8.pipe_config = + AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); +} + +static void fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, + union dc_tiling_info *tiling_info) +{ + /* Fill GFX9 params */ + tiling_info->gfx9.num_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + tiling_info->gfx9.num_banks = + adev->gfx.config.gb_addr_config_fields.num_banks; + tiling_info->gfx9.pipe_interleave = + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size; + tiling_info->gfx9.num_shader_engines = + adev->gfx.config.gb_addr_config_fields.num_se; + tiling_info->gfx9.max_compressed_frags = + adev->gfx.config.gb_addr_config_fields.max_compress_frags; + tiling_info->gfx9.num_rb_per_se = + adev->gfx.config.gb_addr_config_fields.num_rb_per_se; + tiling_info->gfx9.shaderEnable = 1; + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; +} + +static void fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, + union dc_tiling_info *tiling_info, + uint64_t modifier) +{ + unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); + unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); + unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier); + unsigned int pipes_log2; + + pipes_log2 = min(5u, mod_pipe_xor_bits); + + fill_gfx9_tiling_info_from_device(adev, tiling_info); + + if (!IS_AMD_FMT_MOD(modifier)) + return; + + tiling_info->gfx9.num_pipes = 1u << pipes_log2; + tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2); + + if (adev->family >= AMDGPU_FAMILY_NV) { + tiling_info->gfx9.num_pkrs = 1u << pkrs_log2; + } else { + tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits; + + /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */ + } +} + +static int validate_dcc(struct amdgpu_device *adev, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const union dc_tiling_info *tiling_info, + const struct dc_plane_dcc_param *dcc, + const struct dc_plane_address *address, + const struct plane_size *plane_size) +{ + struct dc *dc = adev->dm.dc; + struct dc_dcc_surface_param input; + struct dc_surface_dcc_cap output; + + memset(&input, 0, sizeof(input)); + memset(&output, 0, sizeof(output)); + + if (!dcc->enable) + return 0; + + if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || + !dc->cap_funcs.get_dcc_compression_cap) + return -EINVAL; + + input.format = format; + input.surface_size.width = plane_size->surface_size.width; + input.surface_size.height = plane_size->surface_size.height; + input.swizzle_mode = tiling_info->gfx9.swizzle; + + if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) + input.scan = SCAN_DIRECTION_HORIZONTAL; + else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270) + input.scan = SCAN_DIRECTION_VERTICAL; + + if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) + return -EINVAL; + + if (!output.capable) + return -EINVAL; + + if (dcc->independent_64b_blks == 0 && + output.grph.rgb.independent_64b_blks != 0) + return -EINVAL; + + return 0; +} + +static int fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const struct plane_size *plane_size, + union dc_tiling_info *tiling_info, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + const bool force_disable_dcc) +{ + const uint64_t modifier = afb->base.modifier; + int ret = 0; + + fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); + tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); + + if (modifier_has_dcc(modifier) && !force_disable_dcc) { + uint64_t dcc_address = afb->address + afb->base.offsets[1]; + bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); + bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); + + dcc->enable = 1; + dcc->meta_pitch = afb->base.pitches[1]; + dcc->independent_64b_blks = independent_64b_blks; + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) { + if (independent_64b_blks && independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl; + else if (independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_128b; + else if (independent_64b_blks && !independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b; + else + dcc->dcc_ind_blk = hubp_ind_block_unconstrained; + } else { + if (independent_64b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b; + else + dcc->dcc_ind_blk = hubp_ind_block_unconstrained; + } + + address->grph.meta_addr.low_part = lower_32_bits(dcc_address); + address->grph.meta_addr.high_part = upper_32_bits(dcc_address); + } + + ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); + if (ret) + drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); + + return ret; +} + +static void add_gfx10_1_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); + + + /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); +} + +static void add_gfx9_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pipe_xor_bits = min(8, pipes + + ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); + int bank_xor_bits = min(8 - pipe_xor_bits, + ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); + int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + + ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); + + + if (adev->family == AMDGPU_FAMILY_RV) { + /* Raven2 and later */ + bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81; + + /* + * No _D DCC swizzles yet because we only allow 32bpp, which + * doesn't support _D on DCN + */ + + if (has_constant_encode) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1)); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0)); + + if (has_constant_encode) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes)); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) | + AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes)); + } + + /* + * Only supported for 64bpp on Raven, will be filtered on format in + * dm_plane_format_mod_supported. + */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); + + if (adev->family == AMDGPU_FAMILY_RV) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); + } + + /* + * Only supported for 64bpp on Raven, will be filtered on format in + * dm_plane_format_mod_supported. + */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + if (adev->family == AMDGPU_FAMILY_RV) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + } +} + +static void add_gfx10_3_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs)); + + /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); +} + +static void add_gfx11_modifiers(struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int num_pipes = 0; + int pipe_xor_bits = 0; + int num_pkrs = 0; + int pkrs = 0; + u32 gb_addr_config; + u8 i = 0; + unsigned swizzle_r_x; + uint64_t modifier_r_x; + uint64_t modifier_dcc_best; + uint64_t modifier_dcc_4k; + + /* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from + * adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} + */ + gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); + ASSERT(gb_addr_config != 0); + + num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); + pkrs = ilog2(num_pkrs); + num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES); + pipe_xor_bits = ilog2(num_pipes); + + for (i = 0; i < 2; i++) { + /* Insert the best one first. */ + /* R_X swizzle modes are the best for rendering and DCC requires them. */ + if (num_pipes > 16) + swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X; + else + swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X; + + modifier_r_x = AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(TILE, swizzle_r_x) | + AMD_FMT_MOD_SET(PACKERS, pkrs); + + /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */ + modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); + + /* DCC settings for 4K and greater resolutions. (required by display hw) */ + modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); + + add_modifier(mods, size, capacity, modifier_dcc_best); + add_modifier(mods, size, capacity, modifier_dcc_4k); + + add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1)); + add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1)); + + add_modifier(mods, size, capacity, modifier_r_x); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D)); +} + +static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods) +{ + uint64_t size = 0, capacity = 128; + *mods = NULL; + + /* We have not hooked up any pre-GFX9 modifiers. */ + if (adev->family < AMDGPU_FAMILY_AI) + return 0; + + *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); + + if (plane_type == DRM_PLANE_TYPE_CURSOR) { + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); + return *mods ? 0 : -ENOMEM; + } + + switch (adev->family) { + case AMDGPU_FAMILY_AI: + case AMDGPU_FAMILY_RV: + add_gfx9_modifiers(adev, mods, &size, &capacity); + break; + case AMDGPU_FAMILY_NV: + case AMDGPU_FAMILY_VGH: + case AMDGPU_FAMILY_YC: + case AMDGPU_FAMILY_GC_10_3_6: + case AMDGPU_FAMILY_GC_10_3_7: + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + add_gfx10_3_modifiers(adev, mods, &size, &capacity); + else + add_gfx10_1_modifiers(adev, mods, &size, &capacity); + break; + case AMDGPU_FAMILY_GC_11_0_0: + case AMDGPU_FAMILY_GC_11_0_2: + add_gfx11_modifiers(adev, mods, &size, &capacity); + break; + } + + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); + + /* INVALID marks the end of the list. */ + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); + + if (!*mods) + return -ENOMEM; + + return 0; +} + +static int get_plane_formats(const struct drm_plane *plane, + const struct dc_plane_cap *plane_cap, + uint32_t *formats, int max_formats) +{ + int i, num_formats = 0; + + /* + * TODO: Query support for each group of formats directly from + * DC plane caps. This will require adding more formats to the + * caps list. + */ + + switch (plane->type) { + case DRM_PLANE_TYPE_PRIMARY: + for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = rgb_formats[i]; + } + + if (plane_cap && plane_cap->pixel_format_support.nv12) + formats[num_formats++] = DRM_FORMAT_NV12; + if (plane_cap && plane_cap->pixel_format_support.p010) + formats[num_formats++] = DRM_FORMAT_P010; + if (plane_cap && plane_cap->pixel_format_support.fp16) { + formats[num_formats++] = DRM_FORMAT_XRGB16161616F; + formats[num_formats++] = DRM_FORMAT_ARGB16161616F; + formats[num_formats++] = DRM_FORMAT_XBGR16161616F; + formats[num_formats++] = DRM_FORMAT_ABGR16161616F; + } + break; + + case DRM_PLANE_TYPE_OVERLAY: + for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = overlay_formats[i]; + } + break; + + case DRM_PLANE_TYPE_CURSOR: + for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = cursor_formats[i]; + } + break; + } + + return num_formats; +} + +#ifdef CONFIG_DRM_AMD_DC_HDR +static int attach_color_mgmt_properties(struct amdgpu_display_manager *dm, struct drm_plane *plane) +{ + drm_object_attach_property(&plane->base, + dm->degamma_lut_property, + 0); + drm_object_attach_property(&plane->base, + dm->degamma_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, dm->ctm_property, + 0); + drm_object_attach_property(&plane->base, dm->sdr_boost_property, + DEFAULT_SDR_BOOST); + + return 0; +} +#endif + +int fill_plane_buffer_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const uint64_t tiling_flags, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc) +{ + const struct drm_framebuffer *fb = &afb->base; + int ret; + + memset(tiling_info, 0, sizeof(*tiling_info)); + memset(plane_size, 0, sizeof(*plane_size)); + memset(dcc, 0, sizeof(*dcc)); + memset(address, 0, sizeof(*address)); + + address->tmz_surface = tmz_surface; + + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + uint64_t addr = afb->address + fb->offsets[0]; + + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + address->type = PLN_ADDR_TYPE_GRAPHICS; + address->grph.addr.low_part = lower_32_bits(addr); + address->grph.addr.high_part = upper_32_bits(addr); + } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { + uint64_t luma_addr = afb->address + fb->offsets[0]; + uint64_t chroma_addr = afb->address + fb->offsets[1]; + + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + plane_size->chroma_size.x = 0; + plane_size->chroma_size.y = 0; + /* TODO: set these based on surface format */ + plane_size->chroma_size.width = fb->width / 2; + plane_size->chroma_size.height = fb->height / 2; + + plane_size->chroma_pitch = + fb->pitches[1] / fb->format->cpp[1]; + + address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; + address->video_progressive.luma_addr.low_part = + lower_32_bits(luma_addr); + address->video_progressive.luma_addr.high_part = + upper_32_bits(luma_addr); + address->video_progressive.chroma_addr.low_part = + lower_32_bits(chroma_addr); + address->video_progressive.chroma_addr.high_part = + upper_32_bits(chroma_addr); + } + + if (adev->family >= AMDGPU_FAMILY_AI) { + ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, + rotation, plane_size, + tiling_info, dcc, + address, + force_disable_dcc); + if (ret) + return ret; + } else { + fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags); + } + + return 0; +} + +static int dm_plane_helper_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct amdgpu_framebuffer *afb; + struct drm_gem_object *obj; + struct amdgpu_device *adev; + struct amdgpu_bo *rbo; + struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; + uint32_t domain; + int r; + + if (!new_state->fb) { + DRM_DEBUG_KMS("No FB bound\n"); + return 0; + } + + afb = to_amdgpu_framebuffer(new_state->fb); + obj = new_state->fb->obj[0]; + rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); + + r = amdgpu_bo_reserve(rbo, true); + if (r) { + dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + return r; + } + + r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + if (r) { + dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + goto error_unlock; + } + + if (plane->type != DRM_PLANE_TYPE_CURSOR) + domain = amdgpu_display_supported_domains(adev, rbo->flags); + else + domain = AMDGPU_GEM_DOMAIN_VRAM; + + r = amdgpu_bo_pin(rbo, domain); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to pin framebuffer with error %d\n", r); + goto error_unlock; + } + + r = amdgpu_ttm_alloc_gart(&rbo->tbo); + if (unlikely(r != 0)) { + DRM_ERROR("%p bind failed\n", rbo); + goto error_unpin; + } + + r = drm_gem_plane_helper_prepare_fb(plane, new_state); + if (unlikely(r != 0)) + goto error_unpin; + + amdgpu_bo_unreserve(rbo); + + afb->address = amdgpu_bo_gpu_offset(rbo); + + amdgpu_bo_ref(rbo); + + /** + * We don't do surface updates on planes that have been newly created, + * but we also don't have the afb->address during atomic check. + * + * Fill in buffer attributes depending on the address here, but only on + * newly created planes since they're not being used by DC yet and this + * won't modify global state. + */ + dm_plane_state_old = to_dm_plane_state(plane->state); + dm_plane_state_new = to_dm_plane_state(new_state); + + if (dm_plane_state_new->dc_state && + dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { + struct dc_plane_state *plane_state = + dm_plane_state_new->dc_state; + bool force_disable_dcc = !plane_state->dcc.enable; + + fill_plane_buffer_attributes( + adev, afb, plane_state->format, plane_state->rotation, + afb->tiling_flags, + &plane_state->tiling_info, &plane_state->plane_size, + &plane_state->dcc, &plane_state->address, + afb->tmz_surface, force_disable_dcc); + } + + return 0; + +error_unpin: + amdgpu_bo_unpin(rbo); + +error_unlock: + amdgpu_bo_unreserve(rbo); + return r; +} + +static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct amdgpu_bo *rbo; + int r; + + if (!old_state->fb) + return; + + rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) { + DRM_ERROR("failed to reserve rbo before unpin\n"); + return; + } + + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + amdgpu_bo_unref(&rbo); +} + +static void get_min_max_dc_plane_scaling(struct drm_device *dev, + struct drm_framebuffer *fb, + int *min_downscale, int *max_upscale) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + struct dc *dc = adev->dm.dc; + /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ + struct dc_plane_cap *plane_cap = &dc->caps.planes[0]; + + switch (fb->format->format) { + case DRM_FORMAT_P010: + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV21: + *max_upscale = plane_cap->max_upscale_factor.nv12; + *min_downscale = plane_cap->max_downscale_factor.nv12; + break; + + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + *max_upscale = plane_cap->max_upscale_factor.fp16; + *min_downscale = plane_cap->max_downscale_factor.fp16; + break; + + default: + *max_upscale = plane_cap->max_upscale_factor.argb8888; + *min_downscale = plane_cap->max_downscale_factor.argb8888; + break; + } + + /* + * A factor of 1 in the plane_cap means to not allow scaling, ie. use a + * scaling factor of 1.0 == 1000 units. + */ + if (*max_upscale == 1) + *max_upscale = 1000; + + if (*min_downscale == 1) + *min_downscale = 1000; +} + +int dm_plane_helper_check_state(struct drm_plane_state *state, + struct drm_crtc_state *new_crtc_state) +{ + struct drm_framebuffer *fb = state->fb; + int min_downscale, max_upscale; + int min_scale = 0; + int max_scale = INT_MAX; + + /* Plane enabled? Validate viewport and get scaling factors from plane caps. */ + if (fb && state->crtc) { + /* Validate viewport to cover the case when only the position changes */ + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) { + int viewport_width = state->crtc_w; + int viewport_height = state->crtc_h; + + if (state->crtc_x < 0) + viewport_width += state->crtc_x; + else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay) + viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x; + + if (state->crtc_y < 0) + viewport_height += state->crtc_y; + else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay) + viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y; + + if (viewport_width < 0 || viewport_height < 0) { + DRM_DEBUG_ATOMIC("Plane completely outside of screen\n"); + return -EINVAL; + } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */ + DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2); + return -EINVAL; + } else if (viewport_height < MIN_VIEWPORT_SIZE) { + DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE); + return -EINVAL; + } + + } + + /* Get min/max allowed scaling factors from plane caps. */ + get_min_max_dc_plane_scaling(state->crtc->dev, fb, + &min_downscale, &max_upscale); + /* + * Convert to drm convention: 16.16 fixed point, instead of dc's + * 1.0 == 1000. Also drm scaling is src/dst instead of dc's + * dst/src, so min_scale = 1.0 / max_upscale, etc. + */ + min_scale = (1000 << 16) / max_upscale; + max_scale = (1000 << 16) / min_downscale; + } + + return drm_atomic_helper_check_plane_state( + state, new_crtc_state, min_scale, max_scale, true, true); +} + +int fill_dc_scaling_info(struct amdgpu_device *adev, + const struct drm_plane_state *state, + struct dc_scaling_info *scaling_info) +{ + int scale_w, scale_h, min_downscale, max_upscale; + + memset(scaling_info, 0, sizeof(*scaling_info)); + + /* Source is fixed 16.16 but we ignore mantissa for now... */ + scaling_info->src_rect.x = state->src_x >> 16; + scaling_info->src_rect.y = state->src_y >> 16; + + /* + * For reasons we don't (yet) fully understand a non-zero + * src_y coordinate into an NV12 buffer can cause a + * system hang on DCN1x. + * To avoid hangs (and maybe be overly cautious) + * let's reject both non-zero src_x and src_y. + * + * We currently know of only one use-case to reproduce a + * scenario with non-zero src_x and src_y for NV12, which + * is to gesture the YouTube Android app into full screen + * on ChromeOS. + */ + if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || + (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) && + (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && + (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) + return -EINVAL; + + scaling_info->src_rect.width = state->src_w >> 16; + if (scaling_info->src_rect.width == 0) + return -EINVAL; + + scaling_info->src_rect.height = state->src_h >> 16; + if (scaling_info->src_rect.height == 0) + return -EINVAL; + + scaling_info->dst_rect.x = state->crtc_x; + scaling_info->dst_rect.y = state->crtc_y; + + if (state->crtc_w == 0) + return -EINVAL; + + scaling_info->dst_rect.width = state->crtc_w; + + if (state->crtc_h == 0) + return -EINVAL; + + scaling_info->dst_rect.height = state->crtc_h; + + /* DRM doesn't specify clipping on destination output. */ + scaling_info->clip_rect = scaling_info->dst_rect; + + /* Validate scaling per-format with DC plane caps */ + if (state->plane && state->plane->dev && state->fb) { + get_min_max_dc_plane_scaling(state->plane->dev, state->fb, + &min_downscale, &max_upscale); + } else { + min_downscale = 250; + max_upscale = 16000; + } + + scale_w = scaling_info->dst_rect.width * 1000 / + scaling_info->src_rect.width; + + if (scale_w < min_downscale || scale_w > max_upscale) + return -EINVAL; + + scale_h = scaling_info->dst_rect.height * 1000 / + scaling_info->src_rect.height; + + if (scale_h < min_downscale || scale_h > max_upscale) + return -EINVAL; + + /* + * The "scaling_quality" can be ignored for now, quality = 0 has DC + * assume reasonable defaults based on the format. + */ + + return 0; +} + +static int dm_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct dc *dc = adev->dm.dc; + struct dm_plane_state *dm_plane_state; + struct dc_scaling_info scaling_info; + struct drm_crtc_state *new_crtc_state; + int ret; + + trace_amdgpu_dm_plane_atomic_check(new_plane_state); + + dm_plane_state = to_dm_plane_state(new_plane_state); + + if (!dm_plane_state->dc_state) + return 0; + + new_crtc_state = + drm_atomic_get_new_crtc_state(state, + new_plane_state->crtc); + if (!new_crtc_state) + return -EINVAL; + + ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state); + if (ret) + return ret; + + ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info); + if (ret) + return ret; + + if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK) + return 0; + + return -EINVAL; +} + +static int dm_plane_atomic_async_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + /* Only support async updates on cursor planes. */ + if (plane->type != DRM_PLANE_TYPE_CURSOR) + return -EINVAL; + + return 0; +} + +static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, + struct dc_cursor_position *position) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + int x, y; + int xorigin = 0, yorigin = 0; + + if (!crtc || !plane->state->fb) + return 0; + + if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || + (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { + DRM_ERROR("%s: bad cursor width or height %d x %d\n", + __func__, + plane->state->crtc_w, + plane->state->crtc_h); + return -EINVAL; + } + + x = plane->state->crtc_x; + y = plane->state->crtc_y; + + if (x <= -amdgpu_crtc->max_cursor_width || + y <= -amdgpu_crtc->max_cursor_height) + return 0; + + if (x < 0) { + xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); + x = 0; + } + if (y < 0) { + yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); + y = 0; + } + position->enable = true; + position->translate_by_source = true; + position->x = x; + position->y = y; + position->x_hotspot = xorigin; + position->y_hotspot = yorigin; + + return 0; +} + +void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); + struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; + struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + uint64_t address = afb ? afb->address : 0; + struct dc_cursor_position position = {0}; + struct dc_cursor_attributes attributes; + int ret; + + if (!plane->state->fb && !old_plane_state->fb) + return; + + DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n", + __func__, + amdgpu_crtc->crtc_id, + plane->state->crtc_w, + plane->state->crtc_h); + + ret = get_cursor_position(plane, crtc, &position); + if (ret) + return; + + if (!position.enable) { + /* turn off cursor */ + if (crtc_state && crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); + dc_stream_set_cursor_position(crtc_state->stream, + &position); + mutex_unlock(&adev->dm.dc_lock); + } + return; + } + + amdgpu_crtc->cursor_width = plane->state->crtc_w; + amdgpu_crtc->cursor_height = plane->state->crtc_h; + + memset(&attributes, 0, sizeof(attributes)); + attributes.address.high_part = upper_32_bits(address); + attributes.address.low_part = lower_32_bits(address); + attributes.width = plane->state->crtc_w; + attributes.height = plane->state->crtc_h; + attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA; + attributes.rotation_angle = 0; + attributes.attribute_flags.value = 0; + + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + + if (crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); + if (!dc_stream_set_cursor_attributes(crtc_state->stream, + &attributes)) + DRM_ERROR("DC failed to set cursor attributes\n"); + + if (!dc_stream_set_cursor_position(crtc_state->stream, + &position)) + DRM_ERROR("DC failed to set cursor position\n"); + mutex_unlock(&adev->dm.dc_lock); + } +} + +static void dm_plane_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct drm_plane_state *old_state = + drm_atomic_get_old_plane_state(state, plane); + + trace_amdgpu_dm_atomic_update_cursor(new_state); + + swap(plane->state->fb, new_state->fb); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->src_w = new_state->src_w; + plane->state->src_h = new_state->src_h; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + plane->state->crtc_w = new_state->crtc_w; + plane->state->crtc_h = new_state->crtc_h; + + handle_cursor_update(plane, old_state); +} + +static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { + .prepare_fb = dm_plane_helper_prepare_fb, + .cleanup_fb = dm_plane_helper_cleanup_fb, + .atomic_check = dm_plane_atomic_check, + .atomic_async_check = dm_plane_atomic_async_check, + .atomic_async_update = dm_plane_atomic_async_update +}; + +static void dm_drm_plane_reset(struct drm_plane *plane) +{ + struct dm_plane_state *amdgpu_state = NULL; + + if (plane->state) + plane->funcs->atomic_destroy_state(plane, plane->state); + + amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); + WARN_ON(amdgpu_state == NULL); + + if (amdgpu_state) + __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); +#ifdef CONFIG_DRM_AMD_DC_HDR + if (amdgpu_state) + amdgpu_state->sdr_boost = DEFAULT_SDR_BOOST; +#endif +} + +static struct drm_plane_state * +dm_drm_plane_duplicate_state(struct drm_plane *plane) +{ + struct dm_plane_state *dm_plane_state, *old_dm_plane_state; + + old_dm_plane_state = to_dm_plane_state(plane->state); + dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL); + if (!dm_plane_state) + return NULL; + + __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base); + + if (old_dm_plane_state->dc_state) { + dm_plane_state->dc_state = old_dm_plane_state->dc_state; + dc_plane_state_retain(dm_plane_state->dc_state); + } + +#ifdef CONFIG_DRM_AMD_DC_HDR + if (dm_plane_state->degamma_lut) + drm_property_blob_get(dm_plane_state->degamma_lut); + if (dm_plane_state->ctm) + drm_property_blob_get(dm_plane_state->ctm); + + dm_plane_state->sdr_boost = old_dm_plane_state->sdr_boost; +#endif + + return &dm_plane_state->base; +} + +static bool dm_plane_format_mod_supported(struct drm_plane *plane, + uint32_t format, + uint64_t modifier) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + const struct drm_format_info *info = drm_format_info(format); + struct hw_asic_id asic_id = adev->dm.dc->ctx->asic_id; + + enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3; + + if (!info) + return false; + + /* + * We always have to allow these modifiers: + * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers. + * 2. Not passing any modifiers is the same as explicitly passing INVALID. + */ + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == DRM_FORMAT_MOD_INVALID) { + return true; + } + + /* check if swizzle mode is supported by this version of DCN */ + switch (asic_id.chip_family) { + case FAMILY_SI: + case FAMILY_CI: + case FAMILY_KV: + case FAMILY_CZ: + case FAMILY_VI: + /* asics before AI does not have modifier support */ + return false; + case FAMILY_AI: + case FAMILY_RV: + case FAMILY_NV: + case FAMILY_VGH: + case FAMILY_YELLOW_CARP: + case AMDGPU_FAMILY_GC_10_3_6: + case AMDGPU_FAMILY_GC_10_3_7: + switch (AMD_FMT_MOD_GET(TILE, modifier)) { + case AMD_FMT_MOD_TILE_GFX9_64K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D_X: + case AMD_FMT_MOD_TILE_GFX9_64K_S_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D: + return true; + default: + return false; + } + break; + case AMDGPU_FAMILY_GC_11_0_0: + case AMDGPU_FAMILY_GC_11_0_2: + switch (AMD_FMT_MOD_GET(TILE, modifier)) { + case AMD_FMT_MOD_TILE_GFX11_256K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D_X: + case AMD_FMT_MOD_TILE_GFX9_64K_S_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D: + return true; + default: + return false; + } + break; + default: + ASSERT(0); /* Unknown asic */ + break; + } + + /* + * For D swizzle the canonical modifier depends on the bpp, so check + * it here. + */ + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && + adev->family >= AMDGPU_FAMILY_NV) { + if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4) + return false; + } + + if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D && + info->cpp[0] < 8) + return false; + + if (modifier_has_dcc(modifier)) { + /* Per radeonsi comments 16/64 bpp are more complicated. */ + if (info->cpp[0] != 4) + return false; + /* We support multi-planar formats, but not when combined with + * additional DCC metadata planes. + */ + if (info->num_planes > 1) + return false; + } + + return true; +} + +static void dm_drm_plane_destroy_state(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + +#ifdef CONFIG_DRM_AMD_DC_HDR + drm_property_blob_put(dm_plane_state->degamma_lut); + drm_property_blob_put(dm_plane_state->ctm); +#endif + if (dm_plane_state->dc_state) + dc_plane_state_release(dm_plane_state->dc_state); + + drm_atomic_helper_plane_destroy_state(plane, state); +} + +#ifdef CONFIG_DRM_AMD_DC_HDR +/* copied from drm_atomic_uapi.c */ +static int atomic_replace_property_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced) +{ + struct drm_property_blob *new_blob = NULL; + + if (blob_id != 0) { + new_blob = drm_property_lookup_blob(dev, blob_id); + if (new_blob == NULL) + return -EINVAL; + + if (expected_size > 0 && + new_blob->length != expected_size) { + drm_property_blob_put(new_blob); + return -EINVAL; + } + if (expected_elem_size > 0 && + new_blob->length % expected_elem_size != 0) { + drm_property_blob_put(new_blob); + return -EINVAL; + } + } + + *replaced |= drm_property_replace_blob(blob, new_blob); + drm_property_blob_put(new_blob); + + return 0; +} + +int dm_drm_plane_set_property(struct drm_plane *plane, + struct drm_plane_state *state, + struct drm_property *property, + uint64_t val) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + int ret = 0; + bool replaced; + + if (property == adev->dm.degamma_lut_property) { + ret = atomic_replace_property_blob_from_id(adev_to_drm(adev), + &dm_plane_state->degamma_lut, + val, -1, sizeof(struct drm_color_lut), + &replaced); + } else if (property == adev->dm.ctm_property) { + ret = atomic_replace_property_blob_from_id(adev_to_drm(adev), + &dm_plane_state->ctm, + val, + sizeof(struct drm_color_ctm), -1, + &replaced); + } else if (property == adev->dm.sdr_boost_property) { + dm_plane_state->sdr_boost = val; + } else { + return -EINVAL; + } + + return ret; +} + +int dm_drm_plane_get_property(struct drm_plane *plane, + const struct drm_plane_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + + if (property == adev->dm.degamma_lut_property) { + *val = (dm_plane_state->degamma_lut) ? + dm_plane_state->degamma_lut->base.id : 0; + } else if (property == adev->dm.ctm_property) { + *val = (dm_plane_state->ctm) ? dm_plane_state->ctm->base.id : 0; + } else if (property == adev->dm.sdr_boost_property) { + *val = dm_plane_state->sdr_boost; + } else { + return -EINVAL; + } + + return 0; +} +#endif + +static const struct drm_plane_funcs dm_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_primary_helper_destroy, + .reset = dm_drm_plane_reset, + .atomic_duplicate_state = dm_drm_plane_duplicate_state, + .atomic_destroy_state = dm_drm_plane_destroy_state, + .format_mod_supported = dm_plane_format_mod_supported, +#ifdef CONFIG_DRM_AMD_DC_HDR + .atomic_set_property = dm_drm_plane_set_property, + .atomic_get_property = dm_drm_plane_get_property, +#endif +}; + +int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap) +{ + uint32_t formats[32]; + int num_formats; + int res = -EPERM; + unsigned int supported_rotations; + uint64_t *modifiers = NULL; + + num_formats = get_plane_formats(plane, plane_cap, formats, + ARRAY_SIZE(formats)); + + res = get_plane_modifiers(dm->adev, plane->type, &modifiers); + if (res) + return res; + + res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, + &dm_plane_funcs, formats, num_formats, + modifiers, plane->type, NULL); + kfree(modifiers); + if (res) + return res; + + if (plane->type == DRM_PLANE_TYPE_OVERLAY && + plane_cap && plane_cap->per_pixel_alpha) { + unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); + + drm_plane_create_alpha_property(plane); + drm_plane_create_blend_mode_property(plane, blend_caps); + } + + if (plane->type == DRM_PLANE_TYPE_PRIMARY && + plane_cap && + (plane_cap->pixel_format_support.nv12 || + plane_cap->pixel_format_support.p010)) { + /* This only affects YUV formats. */ + drm_plane_create_color_properties( + plane, + BIT(DRM_COLOR_YCBCR_BT601) | + BIT(DRM_COLOR_YCBCR_BT709) | + BIT(DRM_COLOR_YCBCR_BT2020), + BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | + BIT(DRM_COLOR_YCBCR_FULL_RANGE), + DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + } + + supported_rotations = + DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | + DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; + + if (dm->adev->asic_type >= CHIP_BONAIRE && + plane->type != DRM_PLANE_TYPE_CURSOR) + drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, + supported_rotations); + + drm_plane_helper_add(plane, &dm_plane_helper_funcs); + +#ifdef CONFIG_DRM_AMD_DC_HDR + attach_color_mgmt_properties(dm, plane); +#endif + /* Create (reset) the plane state */ + if (plane->funcs->reset) + plane->funcs->reset(plane); + + return 0; +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h new file mode 100644 index 000000000000..95168c2cfa6f --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_PLANE_H__ +#define __AMDGPU_DM_PLANE_H__ + +#include "dc.h" + +void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state); + +int fill_dc_scaling_info(struct amdgpu_device *adev, + const struct drm_plane_state *state, + struct dc_scaling_info *scaling_info); + +void get_min_max_dc_plane_scaling(struct drm_device *dev, + struct drm_framebuffer *fb, + int *min_downscale, int *max_upscale); + +int dm_plane_helper_check_state(struct drm_plane_state *state, + struct drm_crtc_state *new_crtc_state); + +bool modifier_has_dcc(uint64_t modifier); + +unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier); + +int fill_plane_buffer_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const uint64_t tiling_flags, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc); + +int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap); + +const struct drm_format_info *amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); + +void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 273f8f2c8e02..b9effadfc4bb 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -25,6 +25,9 @@ DC_LIBS = basics bios dml clk_mgr dce gpio irq link virtual ifdef CONFIG_DRM_AMD_DC_DCN + +KCOV_INSTRUMENT := n + DC_LIBS += dcn20 DC_LIBS += dsc DC_LIBS += dcn10 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 053084121db2..271d8e573181 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -107,12 +107,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN201) ############################################################################### CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to Renoir APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) @@ -121,12 +115,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) ############################################################################### CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to VanGogh APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) @@ -135,12 +123,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) ############################################################################### CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to VanGogh APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) @@ -188,31 +170,6 @@ CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32)) -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2 -endif -endif - AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index cf1b5f354ae9..0202dc682682 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -26,10 +26,9 @@ #include "dccg.h" #include "clk_mgr_internal.h" - #include "dcn20/dcn20_clk_mgr.h" #include "rn_clk_mgr.h" - +#include "dml/dcn20/dcn20_fpu.h" #include "dce100/dce_clk_mgr.h" #include "rn_clk_mgr_vbios_smu.h" @@ -45,7 +44,6 @@ /* Constants */ -#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ #define SMU_VER_55_51_0 0x373300 /* SMU Version that is able to set DISPCLK below 100MHz */ /* Macros */ @@ -613,228 +611,6 @@ static struct clk_bw_params rn_bw_params = { }; -static struct wm_table ddr4_wm_table_gs = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 7.09, - .sr_enter_plus_exit_time_us = 8.14, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_gs = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 5.32, - .sr_enter_plus_exit_time_us = 6.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_with_disabled_ppt = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 8.32, - .sr_enter_plus_exit_time_us = 9.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - -static struct wm_table ddr4_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.90, - .sr_enter_plus_exit_time_us = 12.80, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - } -}; - -static struct wm_table ddr4_1R_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 7.32, - .sr_enter_plus_exit_time_us = 8.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { int i; @@ -914,12 +690,10 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params /* * WM set D will be re-purposed for memory retraining */ - bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; - bw_params->wm_table.entries[WM_D].wm_inst = WM_D; - bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; - bw_params->wm_table.entries[WM_D].valid = true; + DC_FP_START(); + dcn21_clk_mgr_set_bw_params_wm_table(bw_params); + DC_FP_END(); } - } void rn_clk_mgr_construct( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h index e4322fa5475b..2e088c5171b2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h @@ -29,6 +29,13 @@ #include "clk_mgr.h" #include "dm_pp_smu.h" +extern struct wm_table ddr4_wm_table_gs; +extern struct wm_table lpddr4_wm_table_gs; +extern struct wm_table lpddr4_wm_table_with_disabled_ppt; +extern struct wm_table ddr4_wm_table_rn; +extern struct wm_table ddr4_1R_wm_table_rn; +extern struct wm_table lpddr4_wm_table_rn; + struct rn_clk_registers { uint32_t CLK1_CLK0_CURRENT_CNT; /* DPREFCLK */ }; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 914708cefc79..3ce0ee0d012f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -29,6 +29,7 @@ #include "dcn20/dcn20_clk_mgr.h" #include "dce100/dce_clk_mgr.h" #include "dcn30/dcn30_clk_mgr.h" +#include "dml/dcn30/dcn30_fpu.h" #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" @@ -97,65 +98,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, uint32_t cl } } -static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) +static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { - /* defaults */ - double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; - double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; - - /* Set A - Normal - default values*/ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Performance - higher minimum clocks */ -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; - clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; - clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000; - clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; - clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000; - clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; - clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000; - clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; - - /* Set D - MALL - SR enter and exit times adjusted for MALL */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; + DC_FP_START(); + dcn3_fpu_build_wm_range_table(&clk_mgr->base); + DC_FP_END(); } void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index f310b0d25a07..24715ca2fa94 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -32,6 +32,9 @@ // For dcn20_update_clocks_update_dpp_dto #include "dcn20/dcn20_clk_mgr.h" +// For DML FPU code +#include "dml/dcn20/dcn20_fpu.h" + #include "vg_clk_mgr.h" #include "dcn301_smu.h" #include "reg_helper.h" @@ -526,81 +529,6 @@ static struct clk_bw_params vg_bw_params = { }; -static struct wm_table ddr4_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 6.09, - .sr_enter_plus_exit_time_us = 7.14, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - } -}; - -static struct wm_table lpddr5_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - } -}; - - static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_table, unsigned int voltage) { @@ -670,10 +598,9 @@ static void vg_clk_mgr_helper_populate_bw_params( /* * WM set D will be re-purposed for memory retraining */ - bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; - bw_params->wm_table.entries[WM_D].wm_inst = WM_D; - bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; - bw_params->wm_table.entries[WM_D].valid = true; + DC_FP_START(); + dcn21_clk_mgr_set_bw_params_wm_table(bw_params); + DC_FP_END(); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h index 7255477307f1..75884f572989 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h @@ -29,6 +29,9 @@ struct watermarks; +extern struct wm_table ddr4_wm_table; +extern struct wm_table lpddr5_wm_table; + struct smu_watermark_set { struct watermarks *wm_set; union large_integer mc_address; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 5b87f937554d..c6785969eb1a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -42,6 +42,7 @@ #include "dcn/dcn_3_2_0_sh_mask.h" #include "dcn32/dcn32_clk_mgr.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x000000C0 @@ -146,83 +147,9 @@ static void dcn32_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e cl static void dcn32_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { - /* defaults */ - double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; - double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; - double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - /* For min clocks use as reported by PM FW and report those as min */ - uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; - uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - uint16_t setb_min_uclk_mhz = min_uclk_mhz; - uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; - - /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ - if (dcfclk_mhz_for_the_second_state) - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; - else - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - - if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) - setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; - - /* Set A - Normal - default values */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ - /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ - if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; - clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; - clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; - clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; - clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; - } - /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ - /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us; // TBD - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; // TBD - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; + DC_FP_START(); + dcn32_build_wm_range_table_fpu(clk_mgr); + DC_FP_END(); } void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 51c9563ad137..e42f44fc1c08 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3478,7 +3478,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); - if (stream && should_use_dmub_lock(stream->link)) { + if (should_use_dmub_lock(stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 858ee51f930a..9e51338441d0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -845,7 +845,7 @@ static bool discover_dp_mst_topology(struct dc_link *link, enum dc_detect_reason return link->type == dc_connection_mst_branch; } -static bool reset_cur_dp_mst_topology(struct dc_link *link) +bool reset_cur_dp_mst_topology(struct dc_link *link) { bool result = false; DC_LOGGER_INIT(link->ctx->logger); @@ -1703,7 +1703,7 @@ static bool dc_link_construct_legacy(struct dc_link *link, enc_init_data.transmitter = translate_encoder_to_transmitter(enc_init_data.encoder); link->link_enc = - link->dc->res_pool->funcs->link_enc_create(&enc_init_data); + link->dc->res_pool->funcs->link_enc_create(dc_ctx, &enc_init_data); if (!link->link_enc) { DC_ERROR("Failed to create link encoder!\n"); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index dfc74aea2852..48dad093ae8b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -7064,6 +7064,7 @@ void dp_enable_link_phy( pipes[i].clock_source->funcs->program_pix_clk( pipes[i].clock_source, &pipes[i].stream_res.pix_clk_params, + dp_get_link_encoding_format(link_settings), &pipes[i].pll_settings); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bdaad4ce4b2d..ffc0f1c0ea93 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -74,6 +74,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) { enum dce_version dc_version = DCE_VERSION_UNKNOWN; + switch (asic_id.chip_family) { #if defined(CONFIG_DRM_AMD_DC_SI) @@ -169,8 +170,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) dc_version = DCN_VERSION_3_21; break; case AMDGPU_FAMILY_GC_11_0_2: - if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev)) - dc_version = DCN_VERSION_3_14; + dc_version = DCN_VERSION_3_14; break; default: dc_version = DCE_VERSION_UNKNOWN; @@ -1463,6 +1463,7 @@ bool dc_add_plane_to_context( struct dc_stream_status *stream_status = NULL; struct pipe_ctx *prev_right_head = NULL; struct pipe_ctx *free_right_pipe = NULL; + struct pipe_ctx *prev_left_head = NULL; DC_LOGGER_INIT(stream->ctx->logger); for (i = 0; i < context->stream_count; i++) @@ -1514,8 +1515,16 @@ bool dc_add_plane_to_context( /* ODM + window MPO, where MPO window is on right half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) && - tail_pipe->next_odm_pipe) { + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) && + tail_pipe->next_odm_pipe) { + + /* For ODM + window MPO, in 3 plane case, if we already have a MPO window on + * the right side, then we will invalidate a 2nd one on the right side + */ + if (head_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { + dc_plane_state_release(plane_state); + return false; + } DC_LOG_SCALER("%s - ODM + window MPO(right). free_pipe:%d tail_pipe->next_odm_pipe:%d\n", __func__, @@ -1530,20 +1539,42 @@ bool dc_add_plane_to_context( * - If not, continue to use free_pipe * - If the right side already has a pipe, use that pipe instead if its available */ + + /* + * We also want to avoid the case where with three plane ( 2 MPO videos ), we have + * both videos on the left side so one of the videos is invalidated. Then we + * move the invalidated video back to the right side. If the order of the plane + * states is such that the right MPO plane is processed first, the free pipe + * selected by the head will be the left MPO pipe. But since there was no right + * MPO pipe, it will assign the free pipe to the right MPO pipe instead and + * a pipe reallocation will occur. + * Check the old context to see if the left side already has a pipe allocated + * - If not, continue to use free_pipe + * - If the left side is already using this pipe, then pick another pipe for right + */ + prev_right_head = &dc->current_state->res_ctx.pipe_ctx[tail_pipe->next_odm_pipe->pipe_idx]; - if ((prev_right_head->bottom_pipe) && (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { + if ((prev_right_head->bottom_pipe) && + (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { free_right_pipe = acquire_free_pipe_for_head(context, pool, tail_pipe->next_odm_pipe); - if (free_right_pipe) { - free_pipe->stream = NULL; - memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); - memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); - free_pipe->plane_state = NULL; - free_pipe->pipe_idx = 0; - free_right_pipe->plane_state = plane_state; - free_pipe = free_right_pipe; + } else { + prev_left_head = &dc->current_state->res_ctx.pipe_ctx[head_pipe->pipe_idx]; + if ((prev_left_head->bottom_pipe) && + (free_pipe->pipe_idx == prev_left_head->bottom_pipe->pipe_idx)) { + free_right_pipe = acquire_free_pipe_for_head(context, pool, head_pipe); } } + if (free_right_pipe) { + free_pipe->stream = NULL; + memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); + memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); + free_pipe->plane_state = NULL; + free_pipe->pipe_idx = 0; + free_right_pipe->plane_state = plane_state; + free_pipe = free_right_pipe; + } + free_pipe->stream_res.tg = tail_pipe->next_odm_pipe->stream_res.tg; free_pipe->stream_res.abm = tail_pipe->next_odm_pipe->stream_res.abm; free_pipe->stream_res.opp = tail_pipe->next_odm_pipe->stream_res.opp; @@ -1553,7 +1584,63 @@ bool dc_add_plane_to_context( free_pipe->top_pipe = tail_pipe->next_odm_pipe; tail_pipe->next_odm_pipe->bottom_pipe = free_pipe; + } else if (free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) + && head_pipe->next_odm_pipe) { + + /* For ODM + window MPO, support 3 plane ( 2 MPO ) case. + * Here we have a desktop ODM + left window MPO and a new MPO window appears + * on the right side only. It fails the first case, because tail_pipe is the + * left window MPO, so it has no next_odm_pipe. So in this scenario, we check + * for head_pipe->next_odm_pipe instead + */ + DC_LOG_SCALER("%s - ODM + win MPO (left) + win MPO (right). free_pipe:%d head_pipe->next_odm:%d\n", + __func__, + free_pipe->pipe_idx, + head_pipe->next_odm_pipe ? head_pipe->next_odm_pipe->pipe_idx : -1); + + /* + * We want to avoid the case where the right side already has a pipe assigned to + * it and is different from free_pipe ( which would cause trigger a pipe + * reallocation ). + * Check the old context to see if the right side already has a pipe allocated + * - If not, continue to use free_pipe + * - If the right side already has a pipe, use that pipe instead if its available + */ + prev_right_head = &dc->current_state->res_ctx.pipe_ctx[head_pipe->next_odm_pipe->pipe_idx]; + if ((prev_right_head->bottom_pipe) && + (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { + free_right_pipe = acquire_free_pipe_for_head(context, pool, head_pipe->next_odm_pipe); + if (free_right_pipe) { + free_pipe->stream = NULL; + memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); + memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); + free_pipe->plane_state = NULL; + free_pipe->pipe_idx = 0; + free_right_pipe->plane_state = plane_state; + free_pipe = free_right_pipe; + } + } + + free_pipe->stream_res.tg = head_pipe->next_odm_pipe->stream_res.tg; + free_pipe->stream_res.abm = head_pipe->next_odm_pipe->stream_res.abm; + free_pipe->stream_res.opp = head_pipe->next_odm_pipe->stream_res.opp; + free_pipe->stream_res.stream_enc = head_pipe->next_odm_pipe->stream_res.stream_enc; + free_pipe->stream_res.audio = head_pipe->next_odm_pipe->stream_res.audio; + free_pipe->clock_source = head_pipe->next_odm_pipe->clock_source; + + free_pipe->top_pipe = head_pipe->next_odm_pipe; + head_pipe->next_odm_pipe->bottom_pipe = free_pipe; } else { + + /* For ODM + window MPO, in 3 plane case, if we already have a MPO window on + * the left side, then we will invalidate a 2nd one on the left side + */ + if (head_pipe->next_odm_pipe && tail_pipe->top_pipe) { + dc_plane_state_release(plane_state); + return false; + } + free_pipe->stream_res.tg = tail_pipe->stream_res.tg; free_pipe->stream_res.abm = tail_pipe->stream_res.abm; free_pipe->stream_res.opp = tail_pipe->stream_res.opp; @@ -1564,21 +1651,28 @@ bool dc_add_plane_to_context( free_pipe->top_pipe = tail_pipe; tail_pipe->bottom_pipe = free_pipe; - if (!free_pipe->next_odm_pipe && tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { - free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; - tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; - } - if (!free_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { - free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; - tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + /* Connect MPO pipes together if MPO window is in the centre */ + if (!(free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= + free_pipe->stream->src.x + free_pipe->stream->src.width/2))) { + if (!free_pipe->next_odm_pipe && + tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { + free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; + tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; + } + if (!free_pipe->prev_odm_pipe && + tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { + free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; + tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + } } } } /* ODM + window MPO, where MPO window is on left half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= - free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { + (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= + free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { DC_LOG_SCALER("%s - ODM + window MPO(left). free_pipe:%d\n", __func__, free_pipe->pipe_idx); @@ -1586,7 +1680,7 @@ bool dc_add_plane_to_context( } /* ODM + window MPO, where MPO window is on right half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { DC_LOG_SCALER("%s - ODM + window MPO(right). free_pipe:%d\n", __func__, free_pipe->pipe_idx); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6752ca44e6e0..f62d50901d92 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -328,6 +328,11 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; + + if (attributes->height * attributes->width * 4 > 16384) + if (stream->mall_stream_config.type == SUBVP_MAIN) + return false; + stream->cursor_attributes = *attributes; dc_z10_restore(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7c42377f0aae..8e1e40083ec8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.194" +#define DC_VER "3.2.196" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -683,7 +683,6 @@ struct dc_debug_options { bool hdmi20_disable; bool skip_detection_link_training; uint32_t edid_read_retry_times; - bool remove_disconnect_edp; unsigned int force_odm_combine; //bit vector based on otg inst unsigned int seamless_boot_odm_combine; unsigned int force_odm_combine_4to1; //bit vector based on otg inst diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 39b426d04037..2d61c2a91cee 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -270,6 +270,23 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); } +void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) +{ + union dmub_rb_cmd cmd = { 0 }; + + cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + // TODO: Uncomment once FW headers are promoted + //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; + cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst; + + cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); + + // Send the command to the DMCUB. + dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd); + dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv); + dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); +} + static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream) { uint8_t pipes = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 1d124a2695d5..159782cd6659 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -74,6 +74,7 @@ void dc_dmub_trace_event_control(struct dc *dc, bool enable); void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal_min, uint32_t vtotal_max); +void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst); bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool enable_pstate, struct dc_state *context); void dc_dmub_srv_query_caps_cmd(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 023774b94da3..a0af0f6afeef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -514,4 +514,7 @@ bool dc_dp_trace_is_logged(struct dc_link *link, struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link, bool in_detection); unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link); + +/* Destruct the mst topology of the link and reset the allocated payload table */ +bool reset_cur_dp_mst_topology(struct dc_link *link); #endif /* DC_LINK_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 5cc7cc0b2f2d..213de8cabfad 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -838,6 +838,7 @@ static void dce112_program_pixel_clk_resync( static bool dce110_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -911,6 +912,7 @@ static bool dce110_program_pix_clk( static bool dce112_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -970,6 +972,7 @@ static bool dce112_program_pix_clk( static bool dcn31_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -993,9 +996,14 @@ static bool dcn31_program_pix_clk( #if defined(CONFIG_DRM_AMD_DC_DCN) /* Enable DTO */ if (clk_src->cs_mask->PIPE0_DTO_SRC_SEL) - REG_UPDATE_2(PIXEL_RATE_CNTL[inst], - DP_DTO0_ENABLE, 1, - PIPE0_DTO_SRC_SEL, 1); + if (encoding == DP_128b_132b_ENCODING) + REG_UPDATE_2(PIXEL_RATE_CNTL[inst], + DP_DTO0_ENABLE, 1, + PIPE0_DTO_SRC_SEL, 2); + else + REG_UPDATE_2(PIXEL_RATE_CNTL[inst], + DP_DTO0_ENABLE, 1, + PIPE0_DTO_SRC_SEL, 1); else REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1); @@ -1198,12 +1206,13 @@ const struct pixel_rate_range_table_entry *look_up_in_video_optimized_rate_tlb( static bool dcn20_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0; - dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings); + dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings); if (clock_source->ctx->dc->hwss.enable_vblanks_synchronization && clock_source->ctx->dc->config.vblank_alignment_max_frame_time_diff > 0) { @@ -1243,6 +1252,7 @@ static const struct clock_source_funcs dcn20_clk_src_funcs = { static bool dcn3_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -1265,7 +1275,7 @@ static bool dcn3_program_pix_clk( REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1); } else // For other signal types(HDMI_TYPE_A, DVI) Driver still to call VBIOS Command table - dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings); + dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings); return true; } @@ -1276,9 +1286,7 @@ static uint32_t dcn3_get_pix_clk_dividers( struct pll_settings *pll_settings) { unsigned long long actual_pix_clk_100Hz = pix_clk_params ? pix_clk_params->requested_pix_clk_100hz : 0; - struct dce110_clk_src *clk_src; - clk_src = TO_DCE110_CLK_SRC(cs); DC_LOGGER_INIT(); if (pix_clk_params == NULL || pll_settings == NULL diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 9ad8ad4550d9..54805802cbd5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -612,6 +612,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce100_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index e69c942c8345..38a67051d470 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1435,6 +1435,7 @@ static enum dc_status dce110_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 41804059550f..f808315b2835 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -660,6 +660,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce110_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 62da6bc3094d..e179e80667d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -618,6 +618,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce112_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index c4353a03b48a..1b70b78e2fa1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -697,6 +697,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce120_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c index dcfa0a3efa00..fc6aa098bda0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c @@ -710,6 +710,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce60_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 0c3695e79652..b28025960050 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -713,6 +713,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce80_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 7a3812604e4b..bed783747f16 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -892,6 +892,7 @@ enum dc_status dcn10_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 11019c2c62cc..769974375b4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -49,6 +49,11 @@ void mpc1_set_bg_color(struct mpc *mpc, /* find bottommost mpcc. */ while (bottommost_mpcc->mpcc_bot) { + /* avoid circular linked link */ + ASSERT(bottommost_mpcc != bottommost_mpcc->mpcc_bot); + if (bottommost_mpcc == bottommost_mpcc->mpcc_bot) + break; + bottommost_mpcc = bottommost_mpcc->mpcc_bot; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index e75be799012e..174eebbe8b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -740,6 +740,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn10_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn10_link_encoder *enc10 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index c99c6fababa9..484e7cdf00b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -59,7 +59,6 @@ void enc1_update_generic_info_packet( uint32_t packet_index, const struct dc_info_packet *info_packet) { - uint32_t regval; /* TODOFPGA Figure out a proper number for max_retries polling for lock * use 50 for now. */ @@ -88,7 +87,6 @@ void enc1_update_generic_info_packet( REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); /* choose which generic packet to use */ - regval = REG_READ(AFMT_VBI_PACKET_CONTROL); REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, packet_index); @@ -259,8 +257,6 @@ void enc1_stream_encoder_dp_set_stream_attribute( uint32_t h_back_porch; uint8_t synchronous_clock = 0; /* asynchronous mode */ uint8_t colorimetry_bpc; - uint8_t dynamic_range_rgb = 0; /*full range*/ - uint8_t dynamic_range_ycbcr = 1; /*bt709*/ uint8_t dp_pixel_encoding = 0; uint8_t dp_component_depth = 0; @@ -372,18 +368,15 @@ void enc1_stream_encoder_dp_set_stream_attribute( switch (output_color_space) { case COLOR_SPACE_SRGB: misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_rgb = 0; /*full range*/ break; case COLOR_SPACE_SRGB_LIMITED: misc0 = misc0 | 0x8; /* bit3=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_rgb = 1; /*limited range*/ break; case COLOR_SPACE_YCBCR601: case COLOR_SPACE_YCBCR601_LIMITED: misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_ycbcr = 0; /*bt601*/ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) @@ -393,15 +386,12 @@ void enc1_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_YCBCR709_LIMITED: misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_ycbcr = 1; /*bt709*/ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: - dynamic_range_rgb = 1; /*limited range*/ - break; case COLOR_SPACE_2020_RGB_FULLRANGE: case COLOR_SPACE_2020_YCBCR: case COLOR_SPACE_XR_RGB: diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 3b26962637d0..3e44b7998429 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -700,6 +700,7 @@ enum dc_status dcn20_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 1483de85a524..8224b9bf01d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -925,6 +925,7 @@ static const struct encoder_feature_support link_enc_feature = { }; struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1268,7 +1269,6 @@ static void get_pixel_clock_parameters( pixel_clk_params->requested_pix_clk_100hz /= 4; else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2) pixel_clk_params->requested_pix_clk_100hz /= 2; - else if (hws->funcs.is_dp_dig_pixel_rate_div_policy) { if (hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)) pixel_clk_params->requested_pix_clk_100hz /= 2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index 7cbe1e9daa36..da0241e8c255 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -50,6 +50,7 @@ struct resource_pool *dcn20_create_resource_pool( struct dc *dc); struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data); unsigned int dcn20_calc_max_scaled_time( diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile index 96cbd4ccd344..5c9ce2cebb0f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile @@ -6,30 +6,6 @@ DCN201 = dcn201_init.o dcn201_resource.o dcn201_hwseq.o \ dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \ dcn201_dccg.o dcn201_link_encoder.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -msse2 -endif -endif AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN201) diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0bb7d3dd53fa..407d995bfa99 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -788,6 +788,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn201_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1036,6 +1037,14 @@ static bool dcn201_get_dcc_compression_cap(const struct dc *dc, output); } +static void dcn201_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + DC_FP_START(); + dcn201_populate_dml_writeback_from_context_fpu(dc, res_ctx, pipes); + DC_FP_END(); +} static void dcn201_destroy_resource_pool(struct resource_pool **pool) { @@ -1067,8 +1076,8 @@ static struct resource_funcs dcn201_res_pool_funcs = { .add_dsc_to_stream_resource = NULL, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, .acquire_idle_pipe_for_layer = dcn201_acquire_idle_pipe_for_layer, + .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, .set_mcif_arb_params = dcn20_set_mcif_arb_params, .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d95875952fba..7cb35bb1c0f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1325,6 +1325,7 @@ static int map_transmitter_id_to_phy_instance( } static struct link_encoder *dcn21_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn21_link_encoder *enc21 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index c20331eb62e0..b7c2ae9ddfda 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -30,36 +30,6 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \ dcn30_dpp_cm.o dcn30_dwb_cm.o dcn30_cm_common.o dcn30_mmhubbub.o \ dcn30_dio_link_encoder.o dcn30_resource.o - -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mpreferred-stack-boundary=4 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2 -endif -endif - AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 7f01463942fb..64320e0ca446 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -927,6 +927,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn30_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1521,26 +1522,11 @@ static bool init_soc_bounding_box(struct dc *dc, loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; loaded_ip->max_num_dpp = pool->base.pipe_count; loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - DC_FP_START(); dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_START(); + patch_dcn30_soc_bounding_box(dc, &dcn3_0_soc); DC_FP_END(); - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - return true; } @@ -2031,44 +2017,6 @@ void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; } -/* - * Finds dummy_latency_index when MCLK switching using firmware based - * vblank stretch is enabled. This function will iterate through the - * table of dummy pstate latencies until the lowest value that allows - * dm_allow_self_refresh_and_mclk_switch to happen is found - */ -int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel) -{ - const int max_latency_table_entries = 4; - int dummy_latency_index = 0; - - while (dummy_latency_index < max_latency_table_entries) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; - dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); - - if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank == - dm_allow_self_refresh_and_mclk_switch) - break; - - dummy_latency_index++; - } - - if (dummy_latency_index == max_latency_table_entries) { - ASSERT(dummy_latency_index != max_latency_table_entries); - /* If the execution gets here, it means dummy p_states are - * not possible. This should never happen and would mean - * something is severely wrong. - * Here we reset dummy_latency_index to 3, because it is - * better to have underflows than system crashes. - */ - dummy_latency_index = 3; - } - - return dummy_latency_index; -} - void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) { DC_FP_START(); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h index 3330a1026fa5..7d063c7d6a4b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h @@ -99,6 +99,9 @@ enum dc_status dcn30_add_stream_to_ctx( void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel); #endif /* _DCN30_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index a5df74110284..db172677d613 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -890,6 +890,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn301_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index e4b69ad0dde5..ebd01cb467b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -7,31 +7,6 @@ DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_X86 -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -msse2 -endif -endif - AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index f537888f4fa6..4fab537e822f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -891,7 +891,9 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { hpd_regs(4) }; -static struct link_encoder *dcn302_link_encoder_create(const struct encoder_init_data *enc_init_data) +static struct link_encoder *dcn302_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 76f863eb86ef..0a67f8a5656d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -819,7 +819,9 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { hpd_regs(1) }; -static struct link_encoder *dcn303_link_encoder_create(const struct encoder_init_data *enc_init_data) +static struct link_encoder *dcn303_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 799a383a2684..7f34418e6308 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -158,9 +158,11 @@ static void dccg31_disable_dpstreamclk(struct dccg *dccg, int otg_inst) } } -void dccg31_set_dpstreamclk(struct dccg *dccg, - enum streamclk_source src, - int otg_inst) +void dccg31_set_dpstreamclk( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) { if (src == REFCLK) dccg31_disable_dpstreamclk(dccg, otg_inst); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h index 32b5593b1460..0902ce5eb8a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h @@ -161,11 +161,6 @@ struct dccg *dccg31_create( void dccg31_init(struct dccg *dccg); -void dccg31_set_dpstreamclk( - struct dccg *dccg, - enum streamclk_source src, - int otg_inst); - void dccg31_enable_symclk32_se( struct dccg *dccg, int hpo_se_inst, @@ -207,7 +202,8 @@ void dccg31_get_dccg_ref_freq( void dccg31_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst); + int otg_inst, + int dp_hpo_inst); void dccg31_set_dtbclk_dto( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index c4304f25ce95..2f7404a97479 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -141,7 +141,7 @@ static bool optc31_disable_crtc(struct timing_generator *optc) return true; } -static bool optc31_immediate_disable_crtc(struct timing_generator *optc) +bool optc31_immediate_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h index 3706e6f7880e..30b81a448ce2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h @@ -258,6 +258,8 @@ void dcn31_timing_generator_init(struct optc *optc1); +bool optc31_immediate_disable_crtc(struct timing_generator *optc); + void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); void optc3_init_odm(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 5e924d0389cc..468a893ff785 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1093,6 +1093,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1663,11 +1664,12 @@ int dcn31_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; @@ -1716,15 +1718,6 @@ int dcn31_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; - } -} - void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1871,8 +1864,6 @@ static bool dcn31_resource_construct( struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn31; @@ -2183,13 +2174,9 @@ static bool dcn31_resource_construct( dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp; - DC_FP_END(); - return true; create_fail: - - DC_FP_END(); dcn31_resource_destruct(pool); return false; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 393458015d6a..41f8ec99da6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -59,7 +59,6 @@ dcn31_set_mcif_arb_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt); -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index ea78da9c6f8b..232cc15979dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -184,7 +184,8 @@ void dccg314_set_dtbclk_dto( void dccg314_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst) + int otg_inst, + int dp_hpo_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -192,26 +193,26 @@ void dccg314_set_dpstreamclk( dccg314_set_dtbclk_p_src(dccg, src, otg_inst); /* enabled to select one of the DTBCLKs for pipe */ - switch (otg_inst) { + switch (dp_hpo_inst) { case 0: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK0_SRC_SEL, 0); + DPSTREAMCLK0_SRC_SEL, otg_inst); break; case 1: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK1_SRC_SEL, 1); + DPSTREAMCLK1_SRC_SEL, otg_inst); break; case 2: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK2_SRC_SEL, 2); + DPSTREAMCLK2_SRC_SEL, otg_inst); break; case 3: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK3_SRC_SEL, 3); + DPSTREAMCLK3_SRC_SEL, otg_inst); break; default: BREAK_TO_DEBUGGER(); @@ -219,6 +220,21 @@ void dccg314_set_dpstreamclk( } } +void dccg314_set_valid_pixel_rate( + struct dccg *dccg, + int ref_dtbclk_khz, + int otg_inst, + int pixclk_khz) +{ + struct dtbclk_dto_params dto_params = {0}; + + dto_params.ref_dtbclk_khz = ref_dtbclk_khz; + dto_params.otg_inst = otg_inst; + dto_params.pixclk_khz = pixclk_khz; + + dccg314_set_dtbclk_dto(dccg, &dto_params); +} + static const struct dccg_funcs dccg314_funcs = { .update_dpp_dto = dccg31_update_dpp_dto, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, @@ -237,6 +253,8 @@ static const struct dccg_funcs dccg314_funcs = { .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, .disable_dsc = dccg31_disable_dscclk, .enable_dsc = dccg31_enable_dscclk, + .set_pixel_rate_div = dccg314_set_pixel_rate_div, + .set_valid_pixel_rate = dccg314_set_valid_pixel_rate, }; struct dccg *dccg314_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h index 99ba597bf9b7..9a4a9efc0203 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h @@ -147,7 +147,8 @@ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\ - DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh) + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\ + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh) struct dccg *dccg314_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index 2dbfa1c234dd..b384f30395d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -50,6 +50,26 @@ enc1->base.ctx +static void enc314_enable_fifo(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON */ + REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); + REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1, DIG_FIFO_READ_START_LEVEL, 0x7); + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); +} + +static void enc314_disable_fifo(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0, + DIG_FIFO_READ_START_LEVEL, 0); +} static void enc314_dp_set_odm_combine( struct stream_encoder *enc, @@ -92,7 +112,7 @@ void enc314_stream_encoder_dvi_set_stream_attribute( //DIG_SOURCE_SELECT is already set in dig_connect_to_otg - /* DIG_START is removed from the register spec */ + enc314_enable_fifo(enc); } ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB); @@ -132,7 +152,7 @@ static void enc314_stream_encoder_hdmi_set_stream_attribute( //DIG_SOURCE_SELECT is already set in dig_connect_to_otg - /* DIG_START is removed from the register spec */ + enc314_enable_fifo(enc); } /* Configure pixel encoding */ @@ -302,16 +322,8 @@ static void enc314_stream_encoder_dp_unblank( REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); - /* - * DIG Resync FIFO now needs to be explicitly enabled. - * TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON - */ - REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1); - REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); - REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); + /* DIG Resync FIFO now needs to be explicitly enabled. */ + enc314_enable_fifo(enc); /* wait 100us for DIG/DP logic to prime * (i.e. a few video lines) @@ -420,6 +432,8 @@ static const struct stream_encoder_funcs dcn314_str_enc_funcs = { .set_dynamic_metadata = enc2_set_dynamic_metadata, .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute, + .enable_fifo = enc314_enable_fifo, + .disable_fifo = enc314_disable_fifo, .set_input_mode = enc314_set_dig_input_mode, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 90ec76487264..755c715ad8dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -338,3 +338,39 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable) if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } + +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int odm_combine_factor = 0; + + odm_combine_factor = get_odm_config(pipe_ctx, NULL); + + if (is_dp_128b_132b_signal(pipe_ctx)) { + *k2_div = PIXEL_RATE_DIV_BY_1; + } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { + *k1_div = PIXEL_RATE_DIV_BY_1; + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + *k2_div = PIXEL_RATE_DIV_BY_2; + else + *k2_div = PIXEL_RATE_DIV_BY_4; + } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + *k1_div = PIXEL_RATE_DIV_BY_1; + *k2_div = PIXEL_RATE_DIV_BY_2; + } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { + *k1_div = PIXEL_RATE_DIV_BY_2; + *k2_div = PIXEL_RATE_DIV_BY_2; + } else { + if (odm_combine_factor == 1) + *k2_div = PIXEL_RATE_DIV_BY_4; + else if (odm_combine_factor == 2) + *k2_div = PIXEL_RATE_DIV_BY_2; + } + } + + if ((*k1_div == PIXEL_RATE_DIV_NA) && (*k2_div == PIXEL_RATE_DIV_NA)) + ASSERT(false); + + return odm_combine_factor; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h index dfdd0b792a52..be0f5e4d48e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h @@ -37,4 +37,6 @@ void dcn314_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool po void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable); +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index c87b1979b2cc..b9debeb081fd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -144,6 +144,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .set_blend_lut = dcn30_set_blend_lut, .set_shaper_3dlut = dcn20_set_shaper_3dlut, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, }; void dcn314_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 436c3545a983..0c7980266b85 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -160,6 +160,37 @@ void optc314_phantom_crtc_post_enable(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); } +static void optc314_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +static void optc314_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); +} + static struct timing_generator_funcs dcn314_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -170,6 +201,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .program_global_sync = optc1_program_global_sync, .enable_crtc = optc314_enable_crtc, .disable_crtc = optc314_disable_crtc, + .immediate_disable_crtc = optc31_immediate_disable_crtc, .phantom_crtc_post_enable = optc314_phantom_crtc_post_enable, /* used by enable_timing_synchronization. Not need for FPGA */ .is_counter_moving = optc1_is_counter_moving, @@ -204,7 +236,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .clear_optc_underflow = optc1_clear_optc_underflow, .setup_global_swap_lock = NULL, .get_crc = optc1_get_crc, - .configure_crc = optc1_configure_crc, + .configure_crc = optc2_configure_crc, .set_dsc_config = optc3_set_dsc_config, .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, @@ -221,6 +253,9 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .setup_manual_trigger = optc2_setup_manual_trigger, .get_hw_timing = optc1_get_hw_timing, .init_odm = optc3_init_odm, + .set_odm_bypass = optc314_set_odm_bypass, + .set_odm_combine = optc314_set_odm_combine, + .set_h_timing_div_manual_mode = optc314_set_h_timing_div_manual_mode, }; void dcn314_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index ba371769dc3e..63861cdfb09f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -69,6 +69,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn314/dcn314_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -1261,6 +1262,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1781,7 +1783,7 @@ static struct clock_source *dcn31_clock_source_create( if (!clk_src) return NULL; - if (dcn3_clk_src_construct(clk_src, ctx, bios, id, + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) { clk_src->base.dp_clk_src = dp_clk_src; return &clk_src->base; @@ -1915,8 +1917,11 @@ static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_14_soc.num_chans = bw_params->num_channels; + if (bw_params->num_channels > 0) + dcn3_14_soc.num_chans = bw_params->num_channels; + + ASSERT(dcn3_14_soc.num_chans); ASSERT(clk_table->num_entries); /* Prepass to find max clocks independent of voltage level. */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index df2abd8fe2eb..7463b12ae4a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -1089,6 +1089,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1658,11 +1659,12 @@ static int dcn315_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (pipes[pipe_cnt].dout.dsc_enable) { switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 070fe10a004e..d56a212e065c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -1088,6 +1088,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1661,11 +1662,12 @@ static int dcn316_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (pipes[pipe_cnt].dout.dsc_enable) { switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 932d85fa4262..e943b643ab6b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -15,34 +15,6 @@ DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_hwseq.o dcn32_init.o \ dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \ dcn32_resource_helpers.o dcn32_mpc.o -ifdef CONFIG_X86 -dcn32_ccflags := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -dcn32_ccflags := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -dcn32_ccflags += -mpreferred-stack-boundary=4 -else -dcn32_ccflags += -msse2 -endif -endif - -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource_helpers.o := $(dcn32_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o := $(dcn32_ccflags) - AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN32) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index 12fc3afd9acd..a31c64b50410 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -211,7 +211,8 @@ static void dccg32_get_dccg_ref_freq(struct dccg *dccg, void dccg32_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst) + int otg_inst, + int dp_hpo_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 0a7d64306481..6ec1c52535b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -47,13 +47,13 @@ void hubp32_update_force_pstate_disallow(struct hubp *hubp, bool pstate_disallow DATA_UCLK_PSTATE_FORCE_VALUE, 0); } -void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel) +void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); // Also cache cursor in MALL if using MALL for SS REG_UPDATE_2(DCHUBP_MALL_CONFIG, USE_MALL_SEL, mall_sel, - USE_MALL_FOR_CURSOR, mall_sel == 2 ? 1 : 0); + USE_MALL_FOR_CURSOR, c_cursor); } void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable) @@ -94,6 +94,44 @@ void hubp32_phantom_hubp_post_enable(struct hubp *hubp) } } +void hubp32_cursor_set_attributes( + struct hubp *hubp, + const struct dc_cursor_attributes *attr) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + enum cursor_pitch hw_pitch = hubp1_get_cursor_pitch(attr->pitch); + enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk( + attr->width, attr->color_format); + + hubp->curs_attr = *attr; + + REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, + CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); + REG_UPDATE(CURSOR_SURFACE_ADDRESS, + CURSOR_SURFACE_ADDRESS, attr->address.low_part); + + REG_UPDATE_2(CURSOR_SIZE, + CURSOR_WIDTH, attr->width, + CURSOR_HEIGHT, attr->height); + + REG_UPDATE_4(CURSOR_CONTROL, + CURSOR_MODE, attr->color_format, + CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, + CURSOR_PITCH, hw_pitch, + CURSOR_LINES_PER_CHUNK, lpc); + + REG_SET_2(CURSOR_SETTINGS, 0, + /* no shift of the cursor HDL schedule */ + CURSOR0_DST_Y_OFFSET, 0, + /* used to shift the cursor chunk request deadline */ + CURSOR0_CHUNK_HDL_ADJUST, 3); + + if (attr->width * attr->height * 4 > 16384) + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true); + else + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false); +} + static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -106,7 +144,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, .mem_program_viewport = min_set_viewport, - .set_cursor_attributes = hubp2_cursor_set_attributes, + .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, .hubp_clk_cntl = hubp2_clk_cntl, .hubp_vtg_sel = hubp2_vtg_sel, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h index 00b4211389c2..56ef71151536 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h @@ -52,12 +52,15 @@ void hubp32_update_force_pstate_disallow(struct hubp *hubp, bool pstate_disallow); -void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel); +void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable); void hubp32_phantom_hubp_post_enable(struct hubp *hubp); +void hubp32_cursor_set_attributes(struct hubp *hubp, + const struct dc_cursor_attributes *attr); + bool hubp32_construct( struct dcn20_hubp *hubp2, struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index b6bada383958..be2e3b9e971e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -424,7 +424,6 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, unsigned int i = 0; bool subvp_immediate_flip = false; bool subvp_in_use = false; - bool drr_pipe = false; struct pipe_ctx *pipe; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -440,12 +439,10 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN && top_pipe_to_program->plane_state->flip_immediate) subvp_immediate_flip = true; - else if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_NONE && - top_pipe_to_program->stream->ignore_msa_timing_param) - drr_pipe = true; } - if ((subvp_in_use && (should_lock_all_pipes || subvp_immediate_flip || drr_pipe)) || (!subvp_in_use && subvp_prev_use)) { + // Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared. + if ((subvp_in_use && (should_lock_all_pipes || subvp_immediate_flip)) || (!subvp_in_use && subvp_prev_use)) { union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 }; if (!lock) { @@ -673,18 +670,23 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) { int i; unsigned int num_ways = dcn32_calculate_cab_allocation(dc, context); + bool cache_cursor = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { + if (hubp->curs_attr.width * hubp->curs_attr.height * 4 > 16384) + cache_cursor = true; + if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - hubp->funcs->hubp_update_mall_sel(hubp, 1); + hubp->funcs->hubp_update_mall_sel(hubp, 1, false); } else { hubp->funcs->hubp_update_mall_sel(hubp, num_ways <= dc->caps.cache_num_ways && - pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0); + pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0, + cache_cursor); } } } @@ -1082,8 +1084,13 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign struct dc_stream_state *stream = pipe_ctx->stream; unsigned int odm_combine_factor = 0; struct dc *dc = pipe_ctx->stream->ctx->dc; - bool two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); + bool two_pix_per_container = false; + // For phantom pipes, use the same programming as the main pipes + if (pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) { + stream = pipe_ctx->stream->mall_stream_config.paired_stream; + } + two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); if (is_dp_128b_132b_signal(pipe_ctx)) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index 00ff21458a53..992e56c6907e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -26,9 +26,11 @@ #include "dcn32_optc.h" #include "dcn30/dcn30_optc.h" +#include "dcn31/dcn31_optc.h" #include "reg_helper.h" #include "dc.h" #include "dcn_calc_math.h" +#include "dc_dmub_srv.h" #define REG(reg)\ optc1->tg_regs->reg @@ -188,6 +190,65 @@ static void optc32_set_odm_bypass(struct timing_generator *optc, optc1->opp_count = 1; } +void optc32_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + struct dc *dc = optc->ctx->dc; + + if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams) + dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst); + else { + /* + * MIN_MASK_EN is gone and MASK is now always enabled. + * + * To get it to it work with manual trigger we need to make sure + * we program the correct bit. + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); + } +} + +void optc32_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + optc32_setup_manual_trigger(optc); + } else { + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_SET_V_TOTAL_MIN_MASK, 0, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_FORCE_LOCK_ON_EVENT, 0); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + } +} static struct timing_generator_funcs dcn32_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -221,7 +282,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, .enable_optc_clock = optc1_enable_optc_clock, .set_vrr_m_const = optc3_set_vrr_m_const, - .set_drr = optc1_set_drr, + .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc3_set_vtotal_min_max, .set_static_screen_control = optc1_set_static_screen_control, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index f913daabcca5..9a26d24b579f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -88,6 +88,7 @@ #include "dml/dcn30/display_mode_vba_30.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x000000C0 #define DCN_BASE__INST0_SEG2 0x000034C0 @@ -114,137 +115,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DC_LOGGER_INIT(logger) -#define DCN3_2_DEFAULT_DET_SIZE 256 -#define DCN3_2_MAX_DET_SIZE 1152 -#define DCN3_2_MIN_DET_SIZE 128 -#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 - -struct _vcs_dpi_ip_params_st dcn3_2_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, - .dtbclk_mhz = 1564.0, - }, - }, - .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, - .sr_exit_z8_time_us = 285.0, - .sr_enter_plus_exit_z8_time_us = 320, - .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .fclk_change_latency_us = 20, - .usr_retraining_latency_us = 2, - .smn_latency_us = 2, - .mall_allocated_for_dcn_mbytes = 64, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 100.0, - .pct_ideal_fabric_bw_after_urgent = 67.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_bw_after_urgent_strobe = 67.0, - .max_avg_sdp_bw_use_normal_percent = 80.0, - .max_avg_fabric_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_strobe_percent = 50.0, - .max_avg_dram_bw_use_normal_percent = 15.0, - .num_chans = 8, - .dram_channel_width_bytes = 2, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .dram_clock_change_latency_us = 400, - .dispclk_dppclk_vco_speed_mhz = 4300.0, - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn32_clk_src_array_id { DCN32_CLK_SRC_PLL0, DCN32_CLK_SRC_PLL1, @@ -312,6 +182,7 @@ enum dcn32_clk_src_array_id { .reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ regBIF_BX0_ ## reg_name +#undef CTX #define CTX ctx #define REG(reg_name) \ (DCN_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -972,8 +843,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .disable_idle_power_optimizations = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -998,6 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = true, + .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, }; @@ -1247,6 +1118,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn32_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1754,171 +1626,6 @@ bool dcn32_release_post_bldn_3dlut( return ret; } -/** - ******************************************************************************************** - * dcn32_get_num_free_pipes: Calculate number of free pipes - * - * This function assumes that a "used" pipe is a pipe that has - * both a stream and a plane assigned to it. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: Number of free pipes available in the context - * - ******************************************************************************************** - */ -static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) -{ - unsigned int i; - unsigned int free_pipes = 0; - unsigned int num_pipes = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream && !pipe->top_pipe) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - } - } - - free_pipes = dc->res_pool->pipe_count - num_pipes; - return free_pipes; -} - -/** - ******************************************************************************************** - * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. - * - * We enter this function if we are Sub-VP capable (i.e. enough pipes available) - * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if - * we are forcing SubVP P-State switching on the current config. - * - * The number of pipes used for the chosen surface must be less than or equal to the - * number of free pipes available. - * - * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). - * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own - * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't - * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [out] index: dc pipe index for the pipe chosen to have phantom pipes assigned - * - * @return: True if a valid pipe assignment was found for Sub-VP. Otherwise false. - * - ******************************************************************************************** - */ - -static bool dcn32_assign_subvp_pipe(struct dc *dc, - struct dc_state *context, - unsigned int *index) -{ - unsigned int i, pipe_idx; - unsigned int max_frame_time = 0; - bool valid_assignment_found = false; - unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); - bool current_assignment_freesync = false; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - unsigned int num_pipes = 0; - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - - pipe = &context->res_ctx.pipe_ctx[i]; - if (num_pipes <= free_pipes) { - struct dc_stream_state *stream = pipe->stream; - unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / - (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; - if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { - *index = i; - max_frame_time = frame_us; - valid_assignment_found = true; - current_assignment_freesync = false; - /* For the 2-Freesync display case, still choose the one with the - * longest frame time - */ - } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || - (current_assignment_freesync && frame_us > max_frame_time))) { - *index = i; - valid_assignment_found = true; - current_assignment_freesync = true; - } - } - } - pipe_idx++; - } - return valid_assignment_found; -} - -/** - * *************************************************************************************** - * dcn32_enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. - * - * This function returns true if there are enough free pipes - * to create the required phantom pipes for any given stream - * (that does not already have phantom pipe assigned). - * - * e.g. For a 2 stream config where the first stream uses one - * pipe and the second stream uses 2 pipes (i.e. pipe split), - * this function will return true because there is 1 remaining - * pipe which can be used as the phantom pipe for the non pipe - * split pipe. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: True if there are enough free pipes to assign phantom pipes to at least one - * stream that does not already have phantom pipes assigned. Otherwise false. - * - * *************************************************************************************** - */ -static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context) -{ - unsigned int i, split_cnt, free_pipes; - unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1 - bool subvp_possible = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - // Find the minimum pipe split count for non SubVP pipes - if (pipe->stream && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - split_cnt = 0; - while (pipe) { - split_cnt++; - pipe = pipe->bottom_pipe; - } - - if (split_cnt < min_pipe_split) - min_pipe_split = split_cnt; - } - } - - free_pipes = dcn32_get_num_free_pipes(dc, context); - - // SubVP only possible if at least one pipe is being used (i.e. free_pipes - // should not equal to the pipe_count) - if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count) - subvp_possible = true; - - return subvp_possible; -} - static void dcn32_enable_phantom_plane(struct dc *dc, struct dc_state *context, struct dc_stream_state *phantom_stream, @@ -1960,93 +1667,6 @@ static void dcn32_enable_phantom_plane(struct dc *dc, } } -/** - * *************************************************************************************** - * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream - * - * Set timing params of the phantom stream based on calculated output from DML. - * This function first gets the DML pipe index using the DC pipe index, then - * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of - * lines required for SubVP MCLK switching and assigns to the phantom stream - * accordingly. - * - * - The number of SubVP lines calculated in DML does not take into account - * FW processing delays and required pstate allow width, so we must include - * that separately. - * - * - Set phantom backporch = vstartup of main pipe - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] ref_pipe: Main pipe for the phantom stream - * @param [in] pipes: DML pipe params - * @param [in] pipe_cnt: number of DML pipes - * @param [in] dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) - * - * @return: void - * - * *************************************************************************************** - */ -static void dcn32_set_phantom_stream_timing(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe, - struct dc_stream_state *phantom_stream, - display_e2e_pipe_params_st *pipes, - unsigned int pipe_cnt, - unsigned int dc_pipe_idx) -{ - unsigned int i, pipe_idx; - struct pipe_ctx *pipe; - uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; - unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; - unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; - - // Find DML pipe index (pipe_idx) using dc_pipe_idx - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (i == dc_pipe_idx) - break; - - pipe_idx++; - } - - // Calculate lines required for pstate allow width and FW processing delays - pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + - dc->caps.subvp_pstate_allow_width_us) / 1000000) * - (ref_pipe->stream->timing.pix_clk_100hz * 100) / - (double)ref_pipe->stream->timing.h_total; - - // Update clks_cfg for calling into recalculate - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = socclk; - - // DML calculation for MALL region doesn't take into account FW delay - // and required pstate allow width for multi-display cases - phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + - pstate_width_fw_delay_lines; - - // For backporch of phantom pipe, use vstartup of the main pipe - phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = phantom_vactive; - phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; - - phantom_stream->timing.v_addressable = phantom_vactive; - phantom_stream->timing.v_front_porch = 1; - phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + - phantom_stream->timing.v_front_porch + - phantom_stream->timing.v_sync_width + - phantom_bp; -} - static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -2068,7 +1688,9 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + DC_FP_START(); dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx); + DC_FP_END(); dc_add_stream_to_ctx(dc, context, phantom_stream); return phantom_stream; @@ -2132,791 +1754,6 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, } } -static bool dcn32_split_stream_for_mpc_or_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *pri_pipe, - struct pipe_ctx *sec_pipe, - bool odm) -{ - int pipe_idx = sec_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - if (pri_pipe->plane_state) { - /* ODM + window MPO, where MPO window is on left half only */ - if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= - pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) - return true; - - /* ODM + window MPO, where MPO window is on right half only */ - if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.width/2) - return true; - } - - *sec_pipe = *pri_pipe; - - sec_pipe->pipe_idx = pipe_idx; - sec_pipe->plane_res.mi = pool->mis[pipe_idx]; - sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; - sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; - sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; - sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; - sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; - sec_pipe->stream_res.dsc = NULL; - if (odm) { - if (pri_pipe->next_odm_pipe) { - ASSERT(pri_pipe->next_odm_pipe != sec_pipe); - sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; - sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; - } - if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { - pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; - } - if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { - pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; - } - pri_pipe->next_odm_pipe = sec_pipe; - sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); - - if (!sec_pipe->top_pipe) - sec_pipe->stream_res.opp = pool->opps[pipe_idx]; - else - sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; - if (sec_pipe->stream->timing.flags.DSC == 1) { - dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); - ASSERT(sec_pipe->stream_res.dsc); - if (sec_pipe->stream_res.dsc == NULL) - return false; - } - } else { - if (pri_pipe->bottom_pipe) { - ASSERT(pri_pipe->bottom_pipe != sec_pipe); - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; - sec_pipe->bottom_pipe->top_pipe = sec_pipe; - } - pri_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe; - - ASSERT(pri_pipe->plane_state); - } - - return true; -} - -static struct pipe_ctx *dcn32_find_split_pipe( - struct dc *dc, - struct dc_state *context, - int old_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[old_index]; - pipe->pipe_idx = old_index; - } - - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - } - - /* - * May need to fix pipes getting tossed from 1 opp to another on flip - * Add for debugging transient underflow during topology updates: - * ASSERT(pipe); - */ - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - - return pipe; -} - - -/** - * *************************************************************************************** - * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable - * - * High level algorithm: - * 1. Find longest microschedule length (in us) between the two SubVP pipes - * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both - * pipes still allows for the maximum microschedule to fit in the active - * region for both pipes. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: bool - True if the SubVP + SubVP config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *subvp_pipes[2]; - struct dc_stream_state *phantom = NULL; - uint32_t microschedule_lines = 0; - uint32_t index = 0; - uint32_t i; - uint32_t max_microschedule_us = 0; - int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - uint32_t time_us = 0; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; - microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + - phantom->timing.v_addressable; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (microschedule_lines * phantom->timing.h_total) / - (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us + - dc->caps.subvp_fw_processing_delay_us + 1; - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - subvp_pipes[index] = pipe; - index++; - - // Maximum 2 SubVP pipes - if (index == 2) - break; - } - } - vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * - subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * - subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -/** - * *************************************************************************************** - * subvp_drr_schedulable: Determine if SubVP + DRR config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe - * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching - * (the margin is equal to the MALL region + DRR margin (500us)) - * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) - * then report the configuration as supported - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] drr_pipe: DRR pipe_ctx for the SubVP + DRR config - * - * @return: bool - True if the SubVP + DRR config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe) -{ - bool schedulable = false; - uint32_t i; - struct pipe_ctx *pipe = NULL; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_crtc_timing *drr_timing = NULL; - int16_t prefetch_us = 0; - int16_t mall_region_us = 0; - int16_t drr_frame_us = 0; // nominal frame time - int16_t subvp_active_us = 0; - int16_t stretched_drr_us = 0; - int16_t drr_stretched_vblank_us = 0; - int16_t max_vblank_mallregion = 0; - - // Find SubVP pipe - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) - break; - } - - main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; - drr_timing = &drr_pipe->stream->timing; - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - drr_frame_us = drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - - return schedulable; -} - -/** - * *************************************************************************************** - * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe - * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) - * then report the configuration as supported - * 3. If the VBLANK display is DRR, then take the DRR static schedulability path - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *pipe = NULL; - struct pipe_ctx *subvp_pipe = NULL; - bool found = false; - bool schedulable = false; - uint32_t i = 0; - uint8_t vblank_index = 0; - uint16_t prefetch_us = 0; - uint16_t mall_region_us = 0; - uint16_t vblank_frame_us = 0; - uint16_t subvp_active_us = 0; - uint16_t vblank_blank_us = 0; - uint16_t max_vblank_mallregion = 0; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_crtc_timing *vblank_timing = NULL; - - /* For SubVP + VBLANK/DRR cases, we assume there can only be - * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK - * is supported, it is either a single VBLANK case or two VBLANK - * displays which are synchronized (in which case they have identical - * timings). - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { - // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). - vblank_index = i; - found = true; - } - - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) - subvp_pipe = pipe; - } - // Use ignore_msa_timing_param flag to identify as DRR - if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { - // SUBVP + DRR case - schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]); - } else if (found) { - main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; - vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -/** - * ******************************************************************************************** - * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle - * static analysis based on the case. - * - * Three cases: - * 1. SubVP + SubVP - * 2. SubVP + VBLANK (DRR checked internally) - * 3. SubVP + VACTIVE (currently unsupported) - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] vlevel: Voltage level calculated by DML - * - * @return: bool - True if statically schedulable, false otherwise - * - * ******************************************************************************************** - */ -static bool subvp_validate_static_schedulability(struct dc *dc, - struct dc_state *context, - int vlevel) -{ - bool schedulable = true; // true by default for single display case - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - uint32_t i, pipe_idx; - uint8_t subvp_count = 0; - uint8_t vactive_count = 0; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) - subvp_count++; - - // Count how many planes are capable of VACTIVE switching (SubVP + VACTIVE unsupported) - if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0) { - vactive_count++; - } - pipe_idx++; - } - - if (subvp_count == 2) { - // Static schedulability check for SubVP + SubVP case - schedulable = subvp_subvp_schedulable(dc, context); - } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) { - // Static schedulability check for SubVP + VBLANK case. Also handle the case where - // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) - if (vactive_count > 0) - schedulable = false; - else - schedulable = subvp_vblank_schedulable(dc, context); - } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp) { - // SubVP + VACTIVE currently unsupported - schedulable = false; - } - return schedulable; -} - -static void dcn32_full_validate_bw_helper(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *vlevel, - int *split, - bool *merge, - int *pipe_cnt) -{ - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - unsigned int dc_pipe_idx = 0; - bool found_supported_config = false; - struct pipe_ctx *pipe = NULL; - uint32_t non_subvp_pipes = 0; - bool drr_pipe_found = false; - uint32_t drr_pipe_index = 0; - uint32_t i = 0; - - /* - * DML favors voltage over p-state, but we're more interested in - * supporting p-state over voltage. We can't support p-state in - * prefetch mode > 0 so try capping the prefetch mode to start. - */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_uclk_fclk_and_stutter; - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - /* This may adjust vlevel and maxMpcComb */ - if (*vlevel < context->bw_ctx.dml.soc.num_states) - *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - /* Conditions for setting up phantom pipes for SubVP: - * 1. Not force disable SubVP - * 2. Full update (i.e. !fast_validate) - * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) - * 4. Display configuration passes validation - * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) - */ - if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) && - (*vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || - dc->debug.force_subvp_mclk_switch)) { - - dcn32_merge_pipes_for_subvp(dc, context); - - while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && - dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { - - /* For the case where *vlevel = num_states, bandwidth validation has failed for this config. - * Adding phantom pipes won't change the validation result, so change the DML input param - * for P-State support before adding phantom pipes and recalculating the DML result. - * However, this case is only applicable for SubVP + DRR cases because the prefetch mode - * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched - * enough to support support MCLK switching. - */ - if (*vlevel == context->bw_ctx.dml.soc.num_states) { - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; - /* There are params (such as FabricClock) that need to be recalculated - * after validation fails (otherwise it will be 0). Calculation for - * phantom vactive requires call into DML, so we must ensure all the - * vba params are valid otherwise we'll get incorrect phantom vactive. - */ - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - } - - dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); - - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - - if (*vlevel < context->bw_ctx.dml.soc.num_states && - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported - && subvp_validate_static_schedulability(dc, context, *vlevel)) { - found_supported_config = true; - } else if (*vlevel < context->bw_ctx.dml.soc.num_states && - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles - * the case for SubVP + DRR, where the DRR display does not support MCLK switch - * at it's native refresh rate / timing. - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - non_subvp_pipes++; - // Use ignore_msa_timing_param flag to identify as DRR - if (pipe->stream->ignore_msa_timing_param) { - drr_pipe_found = true; - drr_pipe_index = i; - } - } - } - // If there is only 1 remaining non SubVP pipe that is DRR, check static - // schedulability for SubVP + DRR. - if (non_subvp_pipes == 1 && drr_pipe_found) { - found_supported_config = subvp_drr_schedulable(dc, - context, &context->res_ctx.pipe_ctx[drr_pipe_index]); - } - } - } - - // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) - // remove phantom pipes and repopulate dml pipes - if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context); - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); - } else { - // only call dcn20_validate_apply_pipe_split_flags if we found a supported config - memset(split, 0, MAX_PIPES * sizeof(int)); - memset(merge, 0, MAX_PIPES * sizeof(bool)); - *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - // Note: We can't apply the phantom pipes to hardware at this time. We have to wait - // until driver has acquired the DMCUB lock to do it safely. - } - } -} - -static bool dcn32_internal_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *vlevel_out, - bool fast_validate) -{ - bool out = false; - bool repopulate_pipes = false; - int split[MAX_PIPES] = { 0 }; - bool merge[MAX_PIPES] = { false }; - bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - ASSERT(pipes); - if (!pipes) - return false; - - // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context); - - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (!fast_validate) { - dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); - } - - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. - * - * We don't actually support prefetch mode 2, so require that we - * at least support prefetch mode 1. - */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - if (vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, MAX_PIPES * sizeof(int)); - memset(merge, 0, MAX_PIPES * sizeof(bool)); - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - } - } - - dml_log_mode_support_params(&context->bw_ctx.dml); - - if (vlevel == context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; - - if (!pipe->stream) - continue; - - /* We only support full screen mpo with ODM */ - if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled - && pipe->plane_state && mpo_pipe - && memcmp(&mpo_pipe->plane_res.scl_data.recout, - &pipe->plane_res.scl_data.recout, - sizeof(struct rect)) != 0) { - ASSERT(mpo_pipe->plane_state != pipe->plane_state); - goto validate_fail; - } - pipe_idx++; - } - - /* merge pipes if necessary */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /*skip pipes that don't need merging*/ - if (!merge[i]) - continue; - - /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ - if (pipe->prev_odm_pipe) { - /*split off odm pipe*/ - pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; - if (pipe->next_odm_pipe) - pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; - - pipe->bottom_pipe = NULL; - pipe->next_odm_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - pipe->top_pipe = NULL; - pipe->prev_odm_pipe = NULL; - if (pipe->stream_res.dsc) - dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - struct pipe_ctx *top_pipe = pipe->top_pipe; - struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; - - top_pipe->bottom_pipe = bottom_pipe; - if (bottom_pipe) - bottom_pipe->top_pipe = top_pipe; - - pipe->top_pipe = NULL; - pipe->bottom_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else - ASSERT(0); /* Should never try to merge master pipe */ - - } - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = NULL; - bool odm; - int old_index = -1; - - if (!pipe->stream || newly_split[i]) - continue; - - pipe_idx++; - odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; - - if (!pipe->plane_state && !odm) - continue; - - if (split[i]) { - if (odm) { - if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - } else { - if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else if (old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - } - hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) - goto validate_fail; - - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe, odm)) - goto validate_fail; - - newly_split[hsplit_pipe->pipe_idx] = true; - repopulate_pipes = true; - } - if (split[i] == 4) { - struct pipe_ctx *pipe_4to1; - - if (odm && old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - - if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe - && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - hsplit_pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - } - if (odm) - dcn20_build_mapped_resource(dc, context, pipe->stream); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!resource_build_scaling_params(pipe)) - goto validate_fail; - } - } - - /* Actual dsc count per stream dsc validation*/ - if (!dcn20_validate_dsc(dc, context)) { - vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - if (repopulate_pipes) - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - *vlevel_out = vlevel; - *pipe_cnt_out = pipe_cnt; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - bool dcn32_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) @@ -2932,9 +1769,9 @@ bool dcn32_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); - DC_FP_START(); + DC_FP_START(); out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); - DC_FP_END(); + DC_FP_END(); if (pipe_cnt == 0) goto validate_out; @@ -2984,11 +1821,13 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; + bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES]; + int plane_count = 0; + struct dc_crtc_timing *timing; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; if (!res_ctx->pipe_ctx[i].stream) continue; @@ -3006,6 +1845,7 @@ int dcn32_populate_dml_pipes_from_context( switch (pipe->stream->mall_stream_config.type) { case SUBVP_MAIN: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; + subvp_in_use = true; break; case SUBVP_PHANTOM: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; @@ -3039,10 +1879,38 @@ int dcn32_populate_dml_pipes_from_context( } } + /* Calculate the number of planes we have so we can determine + * whether to apply ODM 2to1 policy or not + */ + if (pipe->stream && !pipe->prev_odm_pipe && + (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) + ++plane_count; + + DC_FP_START(); + is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + DC_FP_END(); + + pipe_cnt++; + } + + /* Determine whether we will apply ODM 2to1 policy + * Applies to single display and where the number of planes is less than 3 + * For 3 plane case ( 2 MPO planes ), we will not set the policy for the MPO pipes + */ + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; - if (context->stream_count == 1) { - if (dc->debug.enable_single_display_2to1_odm_policy) - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + res_ctx->pipe_ctx[i].stream->odm_2to1_policy_applied = false; + if (context->stream_count == 1 && timing->dsc_cfg.num_slices_h != 1) { + if (dc->debug.enable_single_display_2to1_odm_policy) { + if (!((plane_count > 2) && pipe->top_pipe)) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + } + res_ctx->pipe_ctx[i].stream->odm_2to1_policy_applied = true; } pipe_cnt++; } @@ -3051,8 +1919,7 @@ int dcn32_populate_dml_pipes_from_context( * the DET available for each pipe). Use the DET override input to maintain our driver * policy. */ - switch (pipe_cnt) { - case 1: + if (pipe_cnt == 1 && !is_pipe_split_expected[0]) { pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE; if (pipe->plane_state && !dc->debug.disable_z9_mpc) { if (!is_dual_plane(pipe->plane_state->format)) { @@ -3063,858 +1930,39 @@ int dcn32_populate_dml_pipes_from_context( pipes[0].pipe.src.det_size_override = 320; // 5K or higher } } - break; - case 2: - case 3: - case 4: - // For 2 and 3 pipes, use (MAX_DET_SIZE / pipe_cnt), for 4 pipes use default size for each pipe - for (i = 0; i < pipe_cnt; i++) { - pipes[i].pipe.src.det_size_override = (pipe_cnt < 4) ? (DCN3_2_MAX_DET_SIZE / pipe_cnt) : DCN3_2_DEFAULT_DET_SIZE; - } - break; - } - - dcn32_update_det_override_for_mpo(dc, context, pipes); - - return pipe_cnt; -} - -void dcn32_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx, vlevel_temp = 0; - double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; - double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != - dm_dram_clock_change_unsupported; - - // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK - if (!pstate_en && dcn32_subvp_in_use(dc, context)) { - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; - pstate_en = true; - } - - /* Set B: - * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, - * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark - * calculations to cover bootup clocks. - * DCFCLK: soc.clock_limits[2] when available - * UCLK: soc.clock_limits[2] when available - */ - if (dcn3_2_soc.num_states > 2) { - vlevel_temp = 2; - dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set D: - * All clocks min. - * DCFCLK: Min, as reported by PM FW when available - * UCLK : Min, as reported by PM FW when available - * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) - */ - - if (dcn3_2_soc.num_states > 2) { - vlevel_temp = 0; - dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set C, for Dummy P-State: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK : Min, as reported by PM FW, when available - * pstate latency as per UCLK state dummy pstate latency - */ - // For Set A and Set C use values from validation - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts_margin = 160; - - if ((!pstate_en)) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; - - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; - - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { - /* The only difference between A and C is p-state latency, if p-state is not supported - * with full p-state latency we want to calculate DLG based on dummy p-state latency, - * Set A p-state watermark set to 0 on DCN32, when p-state unsupported, for now keep as DCN32. - */ - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; - } else { - /* Set A: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK: Min, as reported by PM FW, when available - */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + dcn32_determine_det_override(context, pipes, is_pipe_split_expected, dc->res_pool->pipe_count); - dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + // In general cases we want to keep the dram clock change requirement + // (prefer configs that support MCLK switch). Only override to false + // for SubVP + if (subvp_in_use) + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false; + else + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; - if (!pstate_en) - /* Restore full p-state latency */ - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + return pipe_cnt; } static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - -void dcn32_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) +void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) { DC_FP_START(); - dcn32_calculate_wm_and_dlg_fp( - dc, context, - pipes, - pipe_cnt, - vlevel); + dcn32_calculate_wm_and_dlg_fpu(dc, context, pipes, pipe_cnt, vlevel); DC_FP_END(); } -static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) -{ - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) - return true; - } - return false; -} - -void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - int pipe_cnt, int vlevel) -{ - int i, pipe_idx; - bool usr_retraining_support = false; - - /* Writeback MCIF_WB arbitration parameters */ - dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); - - context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; - context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; - context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; - context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; - context->bw_ctx.bw.dcn.clk.p_state_change_support = - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] - != dm_dram_clock_change_unsupported; - context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context); - /* - * - * TODO: needs FAMS - * Pstate change might not be supported by hardware, but it might be - * possible with firmware driven vertical blank stretching. - */ - // context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching; - context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; - if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported) - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; - else - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; - - usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - ASSERT(usr_retraining_support); - - if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) - context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { - // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; - context->res_ctx.pipe_ctx[i].unbounded_req = false; - } else { - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; - } - if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; - pipe_idx++; - } - /*save a original dppclock copy*/ - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz - * 1000; - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz - * 1000; - - context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml, - &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes, - pipe_cnt, pipe_idx); - - context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs, - &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - pipe_idx++; - } -} - -static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); - - entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); - - entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - } -} - -static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) -{ - float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); - - float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); - - float limiting_bw_kbytes_sec = memory_bw_kbytes_sec; - - if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; - - if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; - - return limiting_bw_kbytes_sec; -} - -static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry) -{ - int index = 0; - int i = 0; - float net_bw_of_new_state = 0; - - if (*num_entries == 0) { - table[0] = *entry; - (*num_entries)++; - } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { - index++; - if (index >= *num_entries) - break; - } - - for (i = *num_entries; i > index; i--) { - table[i] = table[i - 1]; - } - - table[index] = *entry; - (*num_entries)++; - } -} - -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) -{ - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - - if (bw_params->clk_table.entries[i].memclk_mhz > 0) - num_uclk_dpms++; - if (bw_params->clk_table.entries[i].fclk_mhz > 0) - num_fclk_dpms++; - if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) - num_dcfclk_dpms++; - } - - if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) - return -1; - - if (max_dppclk_mhz == 0) - max_dppclk_mhz = max_dispclk_mhz; - - if (max_fclk_mhz == 0) - max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; - - if (max_phyclk_mhz == 0) - max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; - - *num_entries = 0; - entry.dispclk_mhz = max_dispclk_mhz; - entry.dscclk_mhz = max_dispclk_mhz / 3; - entry.dppclk_mhz = max_dppclk_mhz; - entry.dtbclk_mhz = max_dtbclk_mhz; - entry.phyclk_mhz = max_phyclk_mhz; - entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; - entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; - - // Insert all the DCFCLK STAs - for (i = 0; i < num_dcfclk_stas; i++) { - entry.dcfclk_mhz = dcfclk_sta_targets[i]; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - - // Insert the max DCFCLK - entry.dcfclk_mhz = max_dcfclk_mhz; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - - // Insert the UCLK DPMS - for (i = 0; i < num_uclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - - // If FCLK is coarse grained, insert individual DPMs. - if (num_fclk_dpms > 2) { - for (i = 0; i < num_fclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - } - // If FCLK fine grained, only insert max - else { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = max_fclk_mhz; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - - // At this point, the table contains all "points of interest" based on - // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock - // ratios (by derate, are exact). - - // Remove states that require higher clocks than are supported - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz > max_dcfclk_mhz || - table[i].fabricclk_mhz > max_fclk_mhz || - table[i].dram_speed_mts > max_uclk_mhz * 16) - remove_entry_from_table_at_index(table, num_entries, i); - } - - // At this point, the table only contains supported points of interest - // it could be used as is, but some states may be redundant due to - // coarse grained nature of some clocks, so we want to round up to - // coarse grained DPMs and remove duplicates. - - // Round up UCLKs - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_uclk_dpms; j++) { - if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { - table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - // If FCLK is coarse grained, round up to next DPMs - if (num_fclk_dpms > 2) { - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_fclk_dpms; j++) { - if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { - table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; - break; - } - } - } - } - // Otherwise, round up to minimum. - else { - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].fabricclk_mhz < min_fclk_mhz) { - table[i].fabricclk_mhz = min_fclk_mhz; - break; - } - } - } - - // Round DCFCLKs up to minimum - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz < min_dcfclk_mhz) { - table[i].dcfclk_mhz = min_dcfclk_mhz; - break; - } - } - - // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. - i = 0; - while (i < *num_entries - 1) { - if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && - table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && - table[i].dram_speed_mts == table[i + 1].dram_speed_mts) - remove_entry_from_table_at_index(table, num_entries, i + 1); - else - i++; - } - - // Fix up the state indicies - for (i = *num_entries - 1; i >= 0 ; i--) { - table[i].state = i; - } - - return 0; -} - -/* dcn32_update_bw_bounding_box - * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet - * with actual values as per dGPU SKU: - * -with passed few options from dc->config - * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) - * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes - * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU - * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) - * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different - * clocks (which might differ for certain dGPU SKU of the same ASIC) - */ static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - - /* Overrides from dc->config options */ - dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - /* Override from passed dc->bb_overrides if available*/ - if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns - && dc->bb_overrides.sr_exit_time_ns) { - dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000) - != dc->bb_overrides.sr_enter_plus_exit_time_ns - && dc->bb_overrides.sr_enter_plus_exit_time_ns) { - dcn3_2_soc.sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns - && dc->bb_overrides.urgent_latency_ns) { - dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000) - != dc->bb_overrides.dram_clock_change_latency_ns - && dc->bb_overrides.dram_clock_change_latency_ns) { - dcn3_2_soc.dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) - != dc->bb_overrides.dummy_clock_change_latency_ns - && dc->bb_overrides.dummy_clock_change_latency_ns) { - dcn3_2_soc.dummy_pstate_latency_us = - dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; - } - - /* Override from VBIOS if VBIOS bb_info available */ - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_2_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_2_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_2_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - - /* Override from VBIOS for num_chan */ - if (dc->ctx->dc_bios->vram_info.num_chans) - dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - } - - /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ - dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ - if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { - if (dc->debug.use_legacy_soc_bb_mechanism) { - unsigned int i = 0, j = 0, num_states = 0; - - unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; - unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; - unsigned int min_dcfclk = UINT_MAX; - /* Set 199 as first value in STA target array to have a minimum DCFCLK value. - * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */ - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 && - bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk) - min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - } - if (min_dcfclk > dcfclk_sta_targets[0]) - dcfclk_sta_targets[0] = min_dcfclk; - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; - - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; - num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates - for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; - break; - } - } - // Update size of array since we "removed" duplicates - num_dcfclk_sta_targets = i + 1; - } - - num_uclk_states = bw_params->clk_table.num_entries; - - // Calculate optimal dcfclk for each uclk - for (i = 0; i < num_uclk_states; i++) { - dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, - &optimal_dcfclk_for_uclk[i], NULL); - if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { - optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; - } - } - - // Calculate optimal uclk for each dcfclk sta target - for (i = 0; i < num_dcfclk_sta_targets; i++) { - for (j = 0; j < num_uclk_states; j++) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { - optimal_uclk_for_dcfclk_sta_targets[i] = - bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - i = 0; - j = 0; - // create the final dcfclk and uclk table - while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } else { - j = num_uclk_states; - } - } - } - - while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } - - while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } - - dcn3_2_soc.num_states = num_states; - for (i = 0; i < dcn3_2_soc.num_states; i++) { - dcn3_2_soc.clock_limits[i].state = i; - dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - - /* Fill all states with max values of all these clocks */ - dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; - - /* Populate from bw_params for DTBCLK, SOCCLK */ - if (i > 0) { - if (!bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz; - } else { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - - if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) - dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz; - else - dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; - - if (!dram_speed_mts[i] && i > 0) - dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts; - else - dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */ - /* PHYCLK_D18, PHYCLK_D32 */ - dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; - } - } else { - build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states); - } - - /* Re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); - } + DC_FP_START(); + dcn32_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); } static struct resource_funcs dcn32_res_pool_funcs = { @@ -3925,7 +1973,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .validate_bandwidth = dcn32_validate_bandwidth, .calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg, .populate_dml_pipes = dcn32_populate_dml_pipes_from_context, - .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, + .acquire_idle_pipe_for_head_pipe_in_layer = dcn32_acquire_idle_pipe_for_head_pipe_in_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, @@ -3954,7 +2002,7 @@ static bool dcn32_resource_construct( uint32_t pipe_fuses = 0; uint32_t num_pipes = 4; - DC_FP_START(); + DC_FP_START(); ctx->dc_bios->regs = &bios_regs; @@ -4294,13 +2342,13 @@ static bool dcn32_resource_construct( pool->base.oem_device = NULL; } - DC_FP_END(); + DC_FP_END(); return true; create_fail: - DC_FP_END(); + DC_FP_END(); dcn32_resource_destruct(pool); @@ -4324,3 +2372,108 @@ struct resource_pool *dcn32_create_resource_pool( kfree(pool); return NULL; } + +static struct pipe_ctx *find_idle_secondary_pipe_check_mpo( + struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe) +{ + int i; + struct pipe_ctx *secondary_pipe = NULL; + struct pipe_ctx *next_odm_mpo_pipe = NULL; + int primary_index, preferred_pipe_idx; + struct pipe_ctx *old_primary_pipe = NULL; + + /* + * Modified from find_idle_secondary_pipe + * With windowed MPO and ODM, we want to avoid the case where we want a + * free pipe for the left side but the free pipe is being used on the + * right side. + * Add check on current_state if the primary_pipe is the left side, + * to check the right side ( primary_pipe->next_odm_pipe ) to see if + * it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe ) + * - If so, then don't use this pipe + * EXCEPTION - 3 plane ( 2 MPO plane ) case + * - in this case, the primary pipe has already gotten a free pipe for the + * MPO window in the left + * - when it tries to get a free pipe for the MPO window on the right, + * it will see that it is already assigned to the right side + * ( primary_pipe->next_odm_pipe ). But in this case, we want this + * free pipe, since it will be for the right side. So add an + * additional condition, that skipping the free pipe on the right only + * applies if the primary pipe has no bottom pipe currently assigned + */ + if (primary_pipe) { + primary_index = primary_pipe->pipe_idx; + old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index]; + if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe) + && (!primary_pipe->bottom_pipe)) + next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe; + + preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx; + if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + } + } + + /* + * search backwards for the second pipe to keep pipe + * assignment more consistent + */ + if (!secondary_pipe) + for (i = pool->pipe_count - 1; i >= 0; i--) { + if ((res_ctx->pipe_ctx[i].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) { + secondary_pipe = &res_ctx->pipe_ctx[i]; + secondary_pipe->pipe_idx = i; + break; + } + } + + return secondary_pipe; +} + +struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + struct dc_state *state, + const struct resource_pool *pool, + struct dc_stream_state *stream, + struct pipe_ctx *head_pipe) +{ + struct resource_context *res_ctx = &state->res_ctx; + struct pipe_ctx *idle_pipe, *pipe; + struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; + int head_index; + + if (!head_pipe) + ASSERT(0); + + /* + * Modified from dcn20_acquire_idle_pipe_for_layer + * Check if head_pipe in old_context already has bottom_pipe allocated. + * - If so, check if that pipe is available in the current context. + * -- If so, reuse pipe from old_context + */ + head_index = head_pipe->pipe_idx; + pipe = &old_ctx->pipe_ctx[head_index]; + if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) { + idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx]; + idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx; + } else { + idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe); + if (!idle_pipe) + return NULL; + } + + idle_pipe->stream = head_pipe->stream; + idle_pipe->stream_res.tg = head_pipe->stream_res.tg; + idle_pipe->stream_res.opp = head_pipe->stream_res.opp; + + idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; + + return idle_pipe; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index db4546317cb5..1e7e6201c880 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -29,10 +29,14 @@ #include "core_types.h" #define DCN3_2_DET_SEG_SIZE 64 +#define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_2_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; + struct dcn32_resource_pool { struct resource_pool base; }; @@ -41,12 +45,6 @@ struct resource_pool *dcn32_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); -void dcn32_calculate_dlg_params( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel); - struct panel_cntl *dcn32_panel_cntl_create( const struct panel_cntl_init_data *init_data); @@ -100,7 +98,15 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); -void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes); +bool dcn32_mpo_in_use(struct dc_state *context); + +struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + struct dc_state *state, + const struct resource_pool *pool, + struct dc_stream_state *stream, + struct pipe_ctx *head_pipe); + +void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, + bool *is_pipe_split_expected, int pipe_cnt); #endif /* _DCN32_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index e001f6d1f6c3..b3f8503cea9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -26,49 +26,7 @@ // header file of functions being implemented #include "dcn32_resource.h" #include "dcn20/dcn20_resource.h" -/** - * ******************************************************************************************** - * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx - * with those params. - * - * This function must be called AFTER the phantom pipes are added to context and run through DML - * (so that the DLG params for the phantom pipes can be populated), and BEFORE we program the - * timing for the phantom pipes. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] pipes: DML pipe params array - * @param [in] pipe_cnt: DML pipe count - * - * @return: void - * - * ******************************************************************************************** - */ -void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - uint32_t i, pipe_idx; - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->stream) - continue; - - if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; - } - pipe_idx++; - } -} +#include "dml/dcn32/display_mode_vba_util_32.h" /** * ******************************************************************************************** @@ -93,6 +51,9 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat uint32_t cache_lines_used = 0; uint32_t lines_per_way = 0; uint32_t total_cache_lines = 0; + uint32_t bytes_in_mall = 0; + uint32_t num_mblks = 0; + uint32_t cache_lines_per_plane = 0; uint32_t i = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -103,9 +64,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable; + + // For bytes required in MALL, calculate based on number of MBlks required + num_mblks = (mall_region_pixels * bytes_per_pixel + + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES; + bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned) - cache_lines_used += (bytes_per_pixel * mall_region_pixels) / dc->caps.cache_line_size + 2; + cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2; + + // For DCC we must cache the meat surface, so double cache lines required + if (pipe->plane_state->dcc.enable) + cache_lines_per_plane *= 2; + cache_lines_used += cache_lines_per_plane; } } @@ -195,66 +166,47 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } -/* For MPO we adjust the DET allocation to ensure we have enough DET buffer when an MPO pipe - * is removed. For example for 1 MPO + 1 non-MPO normally we would allocate 6 DET segments - * for each pipe [6, 6, 6]. But when transitioning out of MPO it would change from - * [6, 6, 6] -> [9, 9]. However, if VUPDATE for the non-MPO pipe comes first we would be - * trying to allocate more DET than what's currently available which would result in underflow. - * - * In this case we must ensure there is enough buffer when transitioning in and out of MPO: - * - * 1 MPO (2 plane) + 1 non-MPO case: - * [4, 4, 9]<->[9, 9]: Allocate 4 each for MPO pipes, and maintain 9 for non-MPO pipe - * - * 1 MPO (2 plane) + 2 non-MPO case: - * [3, 3, 5, 5]<->[6, 6, 6] - * - * 1 MPO (3 plane) + 1 non-MPO case: - * [3, 3, 3, 9]<->[4, 4, 9] or [3, 3, 3, 6]<->[9, 9] - * - * For multi-display MPO case all pipes will have 4 segments: - * Removing MPO on one of the displays will result in 3 pipes - * (1 MPO and 1 non-MPO which is covered by single MPO stream case). - */ -void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes) +bool dcn32_mpo_in_use(struct dc_state *context) { - uint8_t i, mpo_stream_index, pipe_cnt; - uint8_t mpo_stream_count = 0; - uint8_t mpo_planes = 0; // Only used in single display MPO case - unsigned int j; - struct resource_context *res_ctx = &context->res_ctx; + uint32_t i; for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count > 1) { - mpo_stream_index = i; - mpo_stream_count++; - mpo_planes = context->stream_status[i].plane_count; - } + if (context->stream_status[i].plane_count > 1) + return true; } + return false; +} - if (mpo_stream_count == 1) { - for (j = 0, pipe_cnt = 0; j < dc->res_pool->pipe_count; j++) { - if (!res_ctx->pipe_ctx[j].stream) - continue; - - if (context->res_ctx.pipe_ctx[j].stream == context->streams[mpo_stream_index]) { - // For 3 plane MPO + 1 non-MPO, do [3, 3, 3, 9] - // For 2 plane MPO + 1 non-MPO, do [4, 4, 9] - if (context->stream_count - mpo_stream_count == 1) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * (mpo_planes == 2 ? 4 : 3); - else if (context->stream_count - mpo_stream_count == 2) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 3; +void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, + bool *is_pipe_split_expected, int pipe_cnt) +{ + int i, j, count, stream_segments, pipe_segments[MAX_PIPES]; + + if (context->stream_count > 0) { + stream_segments = 18 / context->stream_count; + for (i = 0; i < context->stream_count; i++) { + count = 0; + for (j = 0; j < pipe_cnt; j++) { + if (context->res_ctx.pipe_ctx[j].stream == context->streams[i]) { + count++; + if (is_pipe_split_expected[j]) + count++; + } + } + pipe_segments[i] = stream_segments / count; + } - } else if (context->res_ctx.pipe_ctx[j].stream && - context->res_ctx.pipe_ctx[j].stream != context->streams[mpo_stream_index]) { - // Update for non-MPO pipes - if (context->stream_count - mpo_stream_count == 1) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 9; - else if (context->stream_count - mpo_stream_count == 2) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 5; + for (i = 0; i < pipe_cnt; i++) { + pipes[i].pipe.src.det_size_override = 0; + for (j = 0; j < context->stream_count; j++) { + if (context->res_ctx.pipe_ctx[i].stream == context->streams[j]) { + pipes[i].pipe.src.det_size_override = pipe_segments[j] * DCN3_2_DET_SEG_SIZE; + break; + } } - pipe_cnt++; } + } else { + for (i = 0; i < pipe_cnt; i++) + pipes[i].pipe.src.det_size_override = 4 * DCN3_2_DET_SEG_SIZE; //DCN3_2_DEFAULT_DET_SIZE } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile index e554fd6c16f2..0a199c83bb5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile @@ -12,31 +12,6 @@ DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -msse2 -endif -endif - AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN321) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ebbeebf972dc..8157e40d2c7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -37,6 +37,8 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" +#include "dml/dcn321/dcn321_fpu.h" + #include "dcn10/dcn10_ipp.h" #include "dcn30/dcn30_hubbub.h" #include "dcn31/dcn31_hubbub.h" @@ -120,134 +122,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define fixed16_to_double(x) (((double)x) / ((double) (1 << 16))) #define fixed16_to_double_to_cpu(x) fixed16_to_double(le32_to_cpu(x)) -#define DCN3_2_DEFAULT_DET_SIZE 256 - -struct _vcs_dpi_ip_params_st dcn3_21_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, - .dtbclk_mhz = 1564.0, - }, - }, - .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, - .sr_exit_z8_time_us = 285.0, - .sr_enter_plus_exit_z8_time_us = 320, - .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .fclk_change_latency_us = 20, - .usr_retraining_latency_us = 2, - .smn_latency_us = 2, - .mall_allocated_for_dcn_mbytes = 64, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 100.0, - .pct_ideal_fabric_bw_after_urgent = 67.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_bw_after_urgent_strobe = 67.0, - .max_avg_sdp_bw_use_normal_percent = 80.0, - .max_avg_fabric_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_strobe_percent = 50.0, - .max_avg_dram_bw_use_normal_percent = 15.0, - .num_chans = 8, - .dram_channel_width_bytes = 2, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .dram_clock_change_latency_us = 400, - .dispclk_dppclk_vco_speed_mhz = 4300.0, - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn321_clk_src_array_id { DCN321_CLK_SRC_PLL0, DCN321_CLK_SRC_PLL1, @@ -970,7 +844,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -995,6 +869,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = true, + .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, }; @@ -1245,6 +1120,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn321_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = @@ -1696,524 +1572,11 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - -static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); - - entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); - - entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); - } -} - -static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) -{ - float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); - - float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); - - float limiting_bw_kbytes_sec = memory_bw_kbytes_sec; - - if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; - - if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; - - return limiting_bw_kbytes_sec; -} - -static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry) -{ - int index = 0; - int i = 0; - float net_bw_of_new_state = 0; - - if (*num_entries == 0) { - table[0] = *entry; - (*num_entries)++; - } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { - index++; - if (index >= *num_entries) - break; - } - - for (i = *num_entries; i > index; i--) { - table[i] = table[i - 1]; - } - - table[index] = *entry; - (*num_entries)++; - } -} - -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) -{ - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - - if (bw_params->clk_table.entries[i].memclk_mhz > 0) - num_uclk_dpms++; - if (bw_params->clk_table.entries[i].fclk_mhz > 0) - num_fclk_dpms++; - if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) - num_dcfclk_dpms++; - } - - if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) - return -1; - - if (max_dppclk_mhz == 0) - max_dppclk_mhz = max_dispclk_mhz; - - if (max_fclk_mhz == 0) - max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; - - if (max_phyclk_mhz == 0) - max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; - - *num_entries = 0; - entry.dispclk_mhz = max_dispclk_mhz; - entry.dscclk_mhz = max_dispclk_mhz / 3; - entry.dppclk_mhz = max_dppclk_mhz; - entry.dtbclk_mhz = max_dtbclk_mhz; - entry.phyclk_mhz = max_phyclk_mhz; - entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; - entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; - - // Insert all the DCFCLK STAs - for (i = 0; i < num_dcfclk_stas; i++) { - entry.dcfclk_mhz = dcfclk_sta_targets[i]; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - - // Insert the max DCFCLK - entry.dcfclk_mhz = max_dcfclk_mhz; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - - // Insert the UCLK DPMS - for (i = 0; i < num_uclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - - // If FCLK is coarse grained, insert individual DPMs. - if (num_fclk_dpms > 2) { - for (i = 0; i < num_fclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - } - // If FCLK fine grained, only insert max - else { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = max_fclk_mhz; - entry.dram_speed_mts = 0; - - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); - } - - // At this point, the table contains all "points of interest" based on - // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock - // ratios (by derate, are exact). - - // Remove states that require higher clocks than are supported - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz > max_dcfclk_mhz || - table[i].fabricclk_mhz > max_fclk_mhz || - table[i].dram_speed_mts > max_uclk_mhz * 16) - remove_entry_from_table_at_index(table, num_entries, i); - } - - // At this point, the table only contains supported points of interest - // it could be used as is, but some states may be redundant due to - // coarse grained nature of some clocks, so we want to round up to - // coarse grained DPMs and remove duplicates. - - // Round up UCLKs - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_uclk_dpms; j++) { - if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { - table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - // If FCLK is coarse grained, round up to next DPMs - if (num_fclk_dpms > 2) { - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_fclk_dpms; j++) { - if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { - table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; - break; - } - } - } - } - // Otherwise, round up to minimum. - else { - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].fabricclk_mhz < min_fclk_mhz) { - table[i].fabricclk_mhz = min_fclk_mhz; - break; - } - } - } - - // Round DCFCLKs up to minimum - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz < min_dcfclk_mhz) { - table[i].dcfclk_mhz = min_dcfclk_mhz; - break; - } - } - - // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. - i = 0; - while (i < *num_entries - 1) { - if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && - table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && - table[i].dram_speed_mts == table[i + 1].dram_speed_mts) - remove_entry_from_table_at_index(table, num_entries, i + 1); - else - i++; - } - - // Fix up the state indicies - for (i = *num_entries - 1; i >= 0 ; i--) { - table[i].state = i; - } - - return 0; -} - -/* dcn321_update_bw_bounding_box - * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet - * with actual values as per dGPU SKU: - * -with passed few options from dc->config - * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) - * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes - * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU - * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) - * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different - * clocks (which might differ for certain dGPU SKU of the same ASIC) - */ static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - /* Overrides from dc->config options */ - dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - /* Override from passed dc->bb_overrides if available*/ - if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns - && dc->bb_overrides.sr_exit_time_ns) { - dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000) - != dc->bb_overrides.sr_enter_plus_exit_time_ns - && dc->bb_overrides.sr_enter_plus_exit_time_ns) { - dcn3_21_soc.sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns - && dc->bb_overrides.urgent_latency_ns) { - dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000) - != dc->bb_overrides.dram_clock_change_latency_ns - && dc->bb_overrides.dram_clock_change_latency_ns) { - dcn3_21_soc.dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) - != dc->bb_overrides.dummy_clock_change_latency_ns - && dc->bb_overrides.dummy_clock_change_latency_ns) { - dcn3_21_soc.dummy_pstate_latency_us = - dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; - } - - /* Override from VBIOS if VBIOS bb_info available */ - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_21_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_21_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_21_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - - /* Override from VBIOS for num_chan */ - if (dc->ctx->dc_bios->vram_info.num_chans) - dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - } - - /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ - dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ - if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { - if (dc->debug.use_legacy_soc_bb_mechanism) { - unsigned int i = 0, j = 0, num_states = 0; - - unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; - unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; - - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564}; - unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - } - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; - - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; - num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates - for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; - break; - } - } - // Update size of array since we "removed" duplicates - num_dcfclk_sta_targets = i + 1; - } - - num_uclk_states = bw_params->clk_table.num_entries; - - // Calculate optimal dcfclk for each uclk - for (i = 0; i < num_uclk_states; i++) { - dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, - &optimal_dcfclk_for_uclk[i], NULL); - if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { - optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; - } - } - - // Calculate optimal uclk for each dcfclk sta target - for (i = 0; i < num_dcfclk_sta_targets; i++) { - for (j = 0; j < num_uclk_states; j++) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { - optimal_uclk_for_dcfclk_sta_targets[i] = - bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - i = 0; - j = 0; - // create the final dcfclk and uclk table - while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } else { - j = num_uclk_states; - } - } - } - - while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } - - while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } - - dcn3_21_soc.num_states = num_states; - for (i = 0; i < dcn3_21_soc.num_states; i++) { - dcn3_21_soc.clock_limits[i].state = i; - dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - - /* Fill all states with max values of all these clocks */ - dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; - - /* Populate from bw_params for DTBCLK, SOCCLK */ - if (i > 0) { - if (!bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz; - } else { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - - if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) - dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; - else - dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; - - if (!dram_speed_mts[i] && i > 0) - dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; - else - dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ - /* PHYCLK_D18, PHYCLK_D32 */ - dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; - } - } else { - build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states); - } - - /* Re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); - } + DC_FP_START(); + dcn321_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); } static struct resource_funcs dcn321_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h index 2732085a0e88..82cbf009f2d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h @@ -31,6 +31,9 @@ #define TO_DCN321_RES_POOL(pool)\ container_of(pool, struct dcn321_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_21_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc; + struct dcn321_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index c48688cdd7f7..359f6e9a1da0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -72,9 +72,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) @@ -124,6 +126,8 @@ DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_3 DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o DML += dcn31/dcn31_fpu.o +DML += dcn32/dcn32_fpu.o +DML += dcn321/dcn321_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index dc60b835e938..39428488a052 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -42,6 +42,9 @@ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #endif +/* Constant */ +#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ + /** * DOC: DCN2x FPU manipulation Overview * @@ -650,6 +653,228 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .num_states = 8 }; +struct wm_table ddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 7.09, + .sr_enter_plus_exit_time_us = 8.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 5.32, + .sr_enter_plus_exit_time_us = 6.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_with_disabled_ppt = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 8.32, + .sr_enter_plus_exit_time_us = 9.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table ddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 11.90, + .sr_enter_plus_exit_time_us = 12.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + } +}; + +struct wm_table ddr4_1R_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 7.32, + .sr_enter_plus_exit_time_us = 8.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + void dcn20_populate_dml_writeback_from_context(struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) @@ -2068,3 +2293,100 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); } + +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; + bw_params->wm_table.entries[WM_D].wm_inst = WM_D; + bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; + bw_params->wm_table.entries[WM_D].valid = true; +} + +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i, j; + double max_calc_writeback_dispclk; + double writeback_dispclk; + struct writeback_st dout_wb; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; + + if (!stream) + continue; + max_calc_writeback_dispclk = 0; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = 0; + pipes[pipe_cnt].dout.num_active_wb = 0; + for (j = 0; j < stream->num_wb_info; j++) { + struct dc_writeback_info *wb_info = &stream->writeback_info[j]; + + if (wb_info->wb_enabled && wb_info->writeback_source_plane && + (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { + pipes[pipe_cnt].dout.wb_enable = 1; + pipes[pipe_cnt].dout.num_active_wb++; + dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; + dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; + dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; + dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;; + dout_wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c; + dout_wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c; + dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) { + if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) + dout_wb.wb_pixel_format = dm_420_8; + else + dout_wb.wb_pixel_format = dm_420_10; + } else + dout_wb.wb_pixel_format = dm_444_32; + + /* Workaround for cases where multiple writebacks are connected to same plane + * In which case, need to compute worst case and set the associated writeback parameters + * This workaround is necessary due to DML computation assuming only 1 set of writeback + * parameters per pipe */ + writeback_dispclk = CalculateWriteBackDISPCLK( + dout_wb.wb_pixel_format, + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, + dout_wb.wb_hratio, + dout_wb.wb_vratio, + dout_wb.wb_htaps_luma, + dout_wb.wb_vtaps_luma, + dout_wb.wb_htaps_chroma, + dout_wb.wb_vtaps_chroma, + dout_wb.wb_dst_width, + pipes[pipe_cnt].pipe.dest.htotal, + 2); + + if (writeback_dispclk > max_calc_writeback_dispclk) { + max_calc_writeback_dispclk = writeback_dispclk; + pipes[pipe_cnt].dout.wb = dout_wb; + } + } + } + + pipe_cnt++; + } + +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h index aa892193e485..c51badf7b68a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -82,4 +82,10 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, bool fast_validate); void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); + +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes); + #endif /* __DCN20_FPU_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index a8db1306750e..6dd9a70314c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -29,7 +29,7 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" - +#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h" #include "display_mode_vba_30.h" #include "dcn30_fpu.h" @@ -616,4 +616,128 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, } +/** + * Finds dummy_latency_index when MCLK switching using firmware based + * vblank stretch is enabled. This function will iterate through the + * table of dummy pstate latencies until the lowest value that allows + * dm_allow_self_refresh_and_mclk_switch to happen is found + */ +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + const int max_latency_table_entries = 4; + int dummy_latency_index = 0; + + dc_assert_fp_enabled(); + + while (dummy_latency_index < max_latency_table_entries) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + + if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank == + dm_allow_self_refresh_and_mclk_switch) + break; + + dummy_latency_index++; + } + + if (dummy_latency_index == max_latency_table_entries) { + ASSERT(dummy_latency_index != max_latency_table_entries); + /* If the execution gets here, it means dummy p_states are + * not possible. This should never happen and would mean + * something is severely wrong. + * Here we reset dummy_latency_index to 3, because it is + * better to have underflows than system crashes. + */ + dummy_latency_index = 3; + } + + return dummy_latency_index; +} + +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base) +{ + /* defaults */ + double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us; + double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz; + + dc_assert_fp_enabled(); + + /* Set A - Normal - default values*/ + base->bw_params->wm_table.nv_entries[WM_A].valid = true; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher minimum clocks */ +// base->bw_params->wm_table.nv_entries[WM_B].valid = true; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + base->bw_params->wm_table.nv_entries[WM_C].valid = true; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600; + base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000; + base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000; + base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000; + base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + + /* Set D - MALL - SR enter and exit times adjusted for MALL */ + base->bw_params->wm_table.nv_entries[WM_D].valid = true; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} +void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip) +{ + dc_assert_fp_enabled(); + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h index dedfe7b5f173..cab864095ce7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -63,5 +63,14 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, unsigned int *dcfclk_mhz, unsigned int *dram_speed_mts); +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base); + +void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip); #endif /* __DCN30_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 842eb94ebe04..876b321b30ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -3037,40 +3037,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double MaxPerPlaneVActiveWRBandwidth = 0; - double WRBandwidth = 0; - double MaxUsedBW = 0; - for (k = 0; k < v->NumberOfActivePlanes; ++k) { - if (v->WritebackEnable[k] == true - && v->WritebackPixelFormat[k] == dm_444_32) { - WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] - / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; - } else if (v->WritebackEnable[k] == true) { - WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] - / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; - } - TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; - MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); - } - v->TotalDataReadBandwidth = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; } - - { - double MaxPerPlaneVActiveRDBandwidth = 0; - for (k = 0; k < v->NumberOfActivePlanes; ++k) { - MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth, - v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); - - } - } - - MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth; } // VStartup Margin diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index e4863f0bf0f6..7ef66e511ec8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -214,6 +214,80 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +struct wm_table ddr4_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 6.09, + .sr_enter_plus_exit_time_us = 7.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +struct wm_table lpddr5_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + } +}; + static void calculate_wm_set_for_vlevel(int vlevel, struct wm_range_table_entry *table_entry, struct dcn_watermarks *wm_set, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 7be3476989ce..e36cfa5985ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -435,6 +435,26 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} + +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + dc_assert_fp_enabled(); + + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; + } +} + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index 24ac19c83687..4372f17b55d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -31,6 +31,11 @@ #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_DEFAULT_DET_SIZE 192 +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index c94cf6e01e25..66b82e4f05c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -866,7 +866,6 @@ static void dml_rq_dlg_get_dlg_params( { const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; - const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; @@ -916,9 +915,6 @@ static void dml_rq_dlg_get_dlg_params( unsigned int vupdate_width; unsigned int vready_offset; - unsigned int dppclk_delay_subtotal; - unsigned int dispclk_delay_subtotal; - unsigned int vstartup_start; unsigned int dst_x_after_scaler; unsigned int dst_y_after_scaler; @@ -1037,21 +1033,6 @@ static void dml_rq_dlg_get_dlg_params( vupdate_width = dst->vupdate_width; vready_offset = dst->vready_offset; - dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; - dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; - - if (scl_enable) - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; - else - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; - - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; - - if (dout->dsc_enable) { - double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA - dispclk_delay_subtotal += dsc_delay; - } - vstartup_start = dst->vstartup_start; if (interlaced) { if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 6101c962ab0a..fc4d7474c111 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -2994,7 +2994,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ImmediateFlipSupportedForPipe[k] == false) { #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Pipe %0d not supporing iflip\n", __func__, k); + dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); #endif v->ImmediateFlipSupported = false; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c new file mode 100644 index 000000000000..66453546e24f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -0,0 +1,2291 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "dcn32_fpu.h" +#include "dc_link_dp.h" +#include "dcn32/dcn32_resource.h" +#include "dcn20/dcn20_resource.h" +#include "display_mode_vba_util_32.h" +// We need this includes for WATERMARKS_* defines +#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +#include "dcn30/dcn30_resource.h" + +#define DC_LOGGER_INIT(logger) + +struct _vcs_dpi_ip_params_st dcn3_2_ip = { + .gpuvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_enable = 0, + .rob_buffer_size_kbytes = 128, + .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1280, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .dcc_meta_buffer_size_bytes = 6272, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 6016, + .dsc422_native_support = true, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 57, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 125, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, + .max_num_dp2p0_outputs = 2, + .max_num_dp2p0_streams = 4, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 1564.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 2150.0, + .dppclk_mhz = 2150.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .phyclk_d32_mhz = 625.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 716.667, + .dram_speed_mts = 16000.0, + .dtbclk_mhz = 1564.0, + }, + }, + .num_states = 1, + .sr_exit_time_us = 20.16, + .sr_enter_plus_exit_time_us = 27.13, + .sr_exit_z8_time_us = 285.0, + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, + .mall_allocated_for_dcn_mbytes = 64, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 100.0, + .pct_ideal_fabric_bw_after_urgent = 67.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_bw_after_urgent_strobe = 67.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_fabric_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_strobe_percent = 50.0, + .max_avg_dram_bw_use_normal_percent = 15.0, + .num_chans = 8, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .dram_clock_change_latency_us = 400, + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) +{ + /* defaults */ + double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; + double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; + double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + uint16_t setb_min_uclk_mhz = min_uclk_mhz; + uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; + + dc_assert_fp_enabled(); + + /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ + if (dcfclk_mhz_for_the_second_state) + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; + else + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + + if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) + setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + } + /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ + /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} + +/** + * dcn32_helper_populate_phantom_dlg_params - Get DLG params for phantom pipes + * and populate pipe_ctx with those params. + * + * This function must be called AFTER the phantom pipes are added to context + * and run through DML (so that the DLG params for the phantom pipes can be + * populated), and BEFORE we program the timing for the phantom pipes. + * + * @dc: [in] current dc state + * @context: [in] new dc state + * @pipes: [in] DML pipe params array + * @pipe_cnt: [in] DML pipe count + */ +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + uint32_t i, pipe_idx; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipes[pipe_idx].pipe.dest.vstartup_start = + get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = + get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = + get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = + get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; + } + pipe_idx++; + } +} + +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) +{ + double pscl_throughput; + double pscl_throughput_chroma; + double dpp_clk_single_dpp, clock; + double clk_frequency = 0.0; + double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; + + dc_assert_fp_enabled(); + + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, + pipe.scale_ratio_depth.hscl_ratio_c, + pipe.scale_ratio_depth.vscl_ratio, + pipe.scale_ratio_depth.vscl_ratio_c, + context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, + context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, + pipe.dest.pixel_rate_mhz, + pipe.src.source_format, + pipe.scale_taps.htaps, + pipe.scale_taps.htaps_c, + pipe.scale_taps.vtaps, + pipe.scale_taps.vtaps_c, + /* Output */ + &pscl_throughput, &pscl_throughput_chroma, + &dpp_clk_single_dpp); + + clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); + + if (clock > 0) + clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0)); + + if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) + return true; + else + return false; +} + +static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) +{ + float memory_bw_kbytes_sec; + float fabric_bw_kbytes_sec; + float sdp_bw_kbytes_sec; + float limiting_bw_kbytes_sec; + + memory_bw_kbytes_sec = entry->dram_speed_mts * + dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * + ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + fabric_bw_kbytes_sec = entry->fabricclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + sdp_bw_kbytes_sec = entry->dcfclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + limiting_bw_kbytes_sec = memory_bw_kbytes_sec; + + if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; + + if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; + + return limiting_bw_kbytes_sec; +} + +static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry) +{ + int i = 0; + int index = 0; + float net_bw_of_new_state = 0; + + dc_assert_fp_enabled(); + + get_optimal_ntuple(entry); + + if (*num_entries == 0) { + table[0] = *entry; + (*num_entries)++; + } else { + net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); + while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + index++; + if (index >= *num_entries) + break; + } + + for (i = *num_entries; i > index; i--) + table[i] = table[i - 1]; + + table[index] = *entry; + (*num_entries)++; + } +} + +/** + * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream + * + * Set timing params of the phantom stream based on calculated output from DML. + * This function first gets the DML pipe index using the DC pipe index, then + * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of + * lines required for SubVP MCLK switching and assigns to the phantom stream + * accordingly. + * + * - The number of SubVP lines calculated in DML does not take into account + * FW processing delays and required pstate allow width, so we must include + * that separately. + * + * - Set phantom backporch = vstartup of main pipe + * + * @dc: current dc state + * @context: new dc state + * @ref_pipe: Main pipe for the phantom stream + * @pipes: DML pipe params + * @pipe_cnt: number of DML pipes + * @dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) + */ +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx) +{ + unsigned int i, pipe_idx; + struct pipe_ctx *pipe; + uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; + unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; + unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; + + dc_assert_fp_enabled(); + + // Find DML pipe index (pipe_idx) using dc_pipe_idx + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (i == dc_pipe_idx) + break; + + pipe_idx++; + } + + // Calculate lines required for pstate allow width and FW processing delays + pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + + dc->caps.subvp_pstate_allow_width_us) / 1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + + // Update clks_cfg for calling into recalculate + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = socclk; + + // DML calculation for MALL region doesn't take into account FW delay + // and required pstate allow width for multi-display cases + phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + + pstate_width_fw_delay_lines; + + // For backporch of phantom pipe, use vstartup of the main pipe + phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + phantom_stream->timing.v_front_porch = 1; + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; +} + +/** + * dcn32_get_num_free_pipes: Calculate number of free pipes + * + * This function assumes that a "used" pipe is a pipe that has + * both a stream and a plane assigned to it. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * Number of free pipes available in the context + */ +static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) +{ + unsigned int i; + unsigned int free_pipes = 0; + unsigned int num_pipes = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && !pipe->top_pipe) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + } + } + + free_pipes = dc->res_pool->pipe_count - num_pipes; + return free_pipes; +} + +/** + * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. + * + * We enter this function if we are Sub-VP capable (i.e. enough pipes available) + * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if + * we are forcing SubVP P-State switching on the current config. + * + * The number of pipes used for the chosen surface must be less than or equal to the + * number of free pipes available. + * + * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). + * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own + * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't + * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). + * + * @param dc: current dc state + * @param context: new dc state + * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned + * + * Return: + * True if a valid pipe assignment was found for Sub-VP. Otherwise false. + */ +static bool dcn32_assign_subvp_pipe(struct dc *dc, + struct dc_state *context, + unsigned int *index) +{ + unsigned int i, pipe_idx; + unsigned int max_frame_time = 0; + bool valid_assignment_found = false; + unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); + bool current_assignment_freesync = false; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + unsigned int num_pipes = 0; + unsigned int refresh_rate = 0; + + if (!pipe->stream) + continue; + + // Round up + refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) + / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + + pipe = &context->res_ctx.pipe_ctx[i]; + if (num_pipes <= free_pipes) { + struct dc_stream_state *stream = pipe->stream; + unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / + (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; + if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { + *index = i; + max_frame_time = frame_us; + valid_assignment_found = true; + current_assignment_freesync = false; + /* For the 2-Freesync display case, still choose the one with the + * longest frame time + */ + } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || + (current_assignment_freesync && frame_us > max_frame_time))) { + *index = i; + valid_assignment_found = true; + current_assignment_freesync = true; + } + } + } + pipe_idx++; + } + return valid_assignment_found; +} + +/** + * dcn32_enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. + * + * This function returns true if there are enough free pipes + * to create the required phantom pipes for any given stream + * (that does not already have phantom pipe assigned). + * + * e.g. For a 2 stream config where the first stream uses one + * pipe and the second stream uses 2 pipes (i.e. pipe split), + * this function will return true because there is 1 remaining + * pipe which can be used as the phantom pipe for the non pipe + * split pipe. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * True if there are enough free pipes to assign phantom pipes to at least one + * stream that does not already have phantom pipes assigned. Otherwise false. + */ +static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context) +{ + unsigned int i, split_cnt, free_pipes; + unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1 + bool subvp_possible = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // Find the minimum pipe split count for non SubVP pipes + if (pipe->stream && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + split_cnt = 0; + while (pipe) { + split_cnt++; + pipe = pipe->bottom_pipe; + } + + if (split_cnt < min_pipe_split) + min_pipe_split = split_cnt; + } + } + + free_pipes = dcn32_get_num_free_pipes(dc, context); + + // SubVP only possible if at least one pipe is being used (i.e. free_pipes + // should not equal to the pipe_count) + if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count) + subvp_possible = true; + + return subvp_possible; +} + +/** + * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable + * + * High level algorithm: + * 1. Find longest microschedule length (in us) between the two SubVP pipes + * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both + * pipes still allows for the maximum microschedule to fit in the active + * region for both pipes. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + SubVP config is schedulable, false otherwise + */ +static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *subvp_pipes[2]; + struct dc_stream_state *phantom = NULL; + uint32_t microschedule_lines = 0; + uint32_t index = 0; + uint32_t i; + uint32_t max_microschedule_us = 0; + int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + uint32_t time_us = 0; + + /* Loop to calculate the maximum microschedule time between the two SubVP pipes, + * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. + */ + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + phantom = pipe->stream->mall_stream_config.paired_stream; + microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + + phantom->timing.v_addressable; + + // Round up when calculating microschedule time (+ 1 at the end) + time_us = (microschedule_lines * phantom->timing.h_total) / + (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us + + dc->caps.subvp_fw_processing_delay_us + 1; + if (time_us > max_microschedule_us) + max_microschedule_us = time_us; + + subvp_pipes[index] = pipe; + index++; + + // Maximum 2 SubVP pipes + if (index == 2) + break; + } + } + vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * + subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * + subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + + if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && + (vactive2_us - vblank1_us) / 2 > max_microschedule_us) + return true; + + return false; +} + +/** + * subvp_drr_schedulable: Determine if SubVP + DRR config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe + * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching + * (the margin is equal to the MALL region + DRR margin (500us)) + * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) + * then report the configuration as supported + * + * @dc: current dc state + * @context: new dc state + * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config + * + * Return: + * bool - True if the SubVP + DRR config is schedulable, false otherwise + */ +static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe) +{ + bool schedulable = false; + uint32_t i; + struct pipe_ctx *pipe = NULL; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *drr_timing = NULL; + int16_t prefetch_us = 0; + int16_t mall_region_us = 0; + int16_t drr_frame_us = 0; // nominal frame time + int16_t subvp_active_us = 0; + int16_t stretched_drr_us = 0; + int16_t drr_stretched_vblank_us = 0; + int16_t max_vblank_mallregion = 0; + + // Find SubVP pipe + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + // Find the SubVP pipe + if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + break; + } + + main_timing = &pipe->stream->timing; + phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; + drr_timing = &drr_pipe->stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + + /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the + * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis + * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + * and the max of (VBLANK blanking time, MALL region)). + */ + if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + schedulable = true; + + return schedulable; +} + + +/** + * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe + * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) + * then report the configuration as supported + * 3. If the VBLANK display is DRR, then take the DRR static schedulability path + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise + */ +static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *pipe = NULL; + struct pipe_ctx *subvp_pipe = NULL; + bool found = false; + bool schedulable = false; + uint32_t i = 0; + uint8_t vblank_index = 0; + uint16_t prefetch_us = 0; + uint16_t mall_region_us = 0; + uint16_t vblank_frame_us = 0; + uint16_t subvp_active_us = 0; + uint16_t vblank_blank_us = 0; + uint16_t max_vblank_mallregion = 0; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *vblank_timing = NULL; + + /* For SubVP + VBLANK/DRR cases, we assume there can only be + * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK + * is supported, it is either a single VBLANK case or two VBLANK + * displays which are synchronized (in which case they have identical + * timings). + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). + vblank_index = i; + found = true; + } + + if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_pipe = pipe; + } + // Use ignore_msa_timing_param flag to identify as DRR + if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { + // SUBVP + DRR case + schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]); + } else if (found) { + main_timing = &subvp_pipe->stream->timing; + phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; + // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe + // Also include the prefetch end to mallstart delay time + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; + + // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + // and the max of (VBLANK blanking time, MALL region) + // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) + if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) + schedulable = true; + } + return schedulable; +} + +/** + * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle + * static analysis based on the case. + * + * Three cases: + * 1. SubVP + SubVP + * 2. SubVP + VBLANK (DRR checked internally) + * 3. SubVP + VACTIVE (currently unsupported) + * + * @dc: current dc state + * @context: new dc state + * @vlevel: Voltage level calculated by DML + * + * Return: + * bool - True if statically schedulable, false otherwise + */ +static bool subvp_validate_static_schedulability(struct dc *dc, + struct dc_state *context, + int vlevel) +{ + bool schedulable = true; // true by default for single display case + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + uint32_t i, pipe_idx; + uint8_t subvp_count = 0; + uint8_t vactive_count = 0; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_count++; + + // Count how many planes that aren't SubVP/phantom are capable of VACTIVE + // switching (SubVP + VACTIVE unsupported). In situations where we force + // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. + if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + vactive_count++; + } + pipe_idx++; + } + + if (subvp_count == 2) { + // Static schedulability check for SubVP + SubVP case + schedulable = subvp_subvp_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) { + // Static schedulability check for SubVP + VBLANK case. Also handle the case where + // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) + if (vactive_count > 0) + schedulable = false; + else + schedulable = subvp_vblank_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp && + vactive_count > 0) { + // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default. + // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count. + // SubVP + VACTIVE currently unsupported + schedulable = false; + } + return schedulable; +} + +static void dcn32_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + unsigned int dc_pipe_idx = 0; + bool found_supported_config = false; + struct pipe_ctx *pipe = NULL; + uint32_t non_subvp_pipes = 0; + bool drr_pipe_found = false; + uint32_t drr_pipe_index = 0; + uint32_t i = 0; + + dc_assert_fp_enabled(); + + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (*vlevel < context->bw_ctx.dml.soc.num_states) + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + /* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. !fast_validate) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ + if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) && + !dcn32_mpo_in_use(context) && (*vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || + dc->debug.force_subvp_mclk_switch)) { + + dcn32_merge_pipes_for_subvp(dc, context); + + while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && + dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { + /* For the case where *vlevel = num_states, bandwidth validation has failed for this config. + * Adding phantom pipes won't change the validation result, so change the DML input param + * for P-State support before adding phantom pipes and recalculating the DML result. + * However, this case is only applicable for SubVP + DRR cases because the prefetch mode + * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched + * enough to support MCLK switching. + */ + if (*vlevel == context->bw_ctx.dml.soc.num_states) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + /* There are params (such as FabricClock) that need to be recalculated + * after validation fails (otherwise it will be 0). Calculation for + * phantom vactive requires call into DML, so we must ensure all the + * vba params are valid otherwise we'll get incorrect phantom vactive. + */ + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + } + + dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); + + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + // Populate dppclk to trigger a recalculate in dml_get_voltage_level + // so the phantom pipe DLG params can be assigned correctly. + pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0); + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + + if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported + && subvp_validate_static_schedulability(dc, context, *vlevel)) { + found_supported_config = true; + } else if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles + * the case for SubVP + DRR, where the DRR display does not support MCLK switch + * at it's native refresh rate / timing. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + non_subvp_pipes++; + // Use ignore_msa_timing_param flag to identify as DRR + if (pipe->stream->ignore_msa_timing_param) { + drr_pipe_found = true; + drr_pipe_index = i; + } + } + } + // If there is only 1 remaining non SubVP pipe that is DRR, check static + // schedulability for SubVP + DRR. + if (non_subvp_pipes == 1 && drr_pipe_found) { + found_supported_config = subvp_drr_schedulable(dc, context, + &context->res_ctx.pipe_ctx[drr_pipe_index]); + } + } + } + + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) + // remove phantom pipes and repopulate dml pipes + if (!found_supported_config) { + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + } else { + // only call dcn20_validate_apply_pipe_split_flags if we found a supported config + memset(split, 0, MAX_PIPES * sizeof(int)); + memset(merge, 0, MAX_PIPES * sizeof(bool)); + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + // Most populate phantom DLG params before programming hardware / timing for phantom pipe + DC_FP_START(); + dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); + DC_FP_END(); + + // Note: We can't apply the phantom pipes to hardware at this time. We have to wait + // until driver has acquired the DMCUB lock to do it safely. + } + } +} + +static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) + return true; + } + return false; +} + +static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, int vlevel) +{ + int i, pipe_idx; + bool usr_retraining_support = false; + bool unbounded_req_enabled = false; + + dc_assert_fp_enabled(); + + /* Writeback MCIF_WB arbitration parameters */ + dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); + + context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; + context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; + context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] + != dm_dram_clock_change_unsupported; + context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context); + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; + if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported) + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; + else + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; + + usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + ASSERT(usr_retraining_support); + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (unbounded_req_enabled && pipe_cnt > 1) { + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + ASSERT(false); + unbounded_req_enabled = false; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[i].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; + } + + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; + pipe_idx++; + } + /*save a original dppclock copy*/ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz + * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz + * 1000; + + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream) + context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes, + pipe_cnt, pipe_idx); + + context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs, + &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe_idx++; + } +} + +static struct pipe_ctx *dcn32_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + +static bool dcn32_split_stream_for_mpc_or_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *pri_pipe, + struct pipe_ctx *sec_pipe, + bool odm) +{ + int pipe_idx = sec_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (odm && pri_pipe->plane_state) { + /* ODM + window MPO, where MPO window is on left half only */ + if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= + pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(left). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + + /* ODM + window MPO, where MPO window is on right half only */ + if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(right). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + } + + *sec_pipe = *pri_pipe; + + sec_pipe->pipe_idx = pipe_idx; + sec_pipe->plane_res.mi = pool->mis[pipe_idx]; + sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; + sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; + sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; + sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; + sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; + sec_pipe->stream_res.dsc = NULL; + if (odm) { + if (pri_pipe->next_odm_pipe) { + ASSERT(pri_pipe->next_odm_pipe != sec_pipe); + sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; + sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; + } + if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { + pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; + } + if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { + pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; + } + pri_pipe->next_odm_pipe = sec_pipe; + sec_pipe->prev_odm_pipe = pri_pipe; + ASSERT(sec_pipe->top_pipe == NULL); + + if (!sec_pipe->top_pipe) + sec_pipe->stream_res.opp = pool->opps[pipe_idx]; + else + sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; + if (sec_pipe->stream->timing.flags.DSC == 1) { + dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + ASSERT(sec_pipe->stream_res.dsc); + if (sec_pipe->stream_res.dsc == NULL) + return false; + } + } else { + if (pri_pipe->bottom_pipe) { + ASSERT(pri_pipe->bottom_pipe != sec_pipe); + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; + sec_pipe->bottom_pipe->top_pipe = sec_pipe; + } + pri_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe; + + ASSERT(pri_pipe->plane_state); + } + + return true; +} + +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + bool repopulate_pipes = false; + int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; + bool newly_split[MAX_PIPES] = { false }; + int pipe_cnt, i, pipe_idx, vlevel; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + dc_assert_fp_enabled(); + + ASSERT(pipes); + if (!pipes) + return false; + + // For each full update, remove all existing phantom pipes first + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (!fast_validate) { + DC_FP_START(); + dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + DC_FP_END(); + } + + if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* + * If mode is unsupported or there's still no p-state support then + * fall back to favoring voltage. + * + * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if + * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) + */ + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_fclk_and_stutter; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + /* Last attempt with Prefetch mode 2 (dm_prefetch_support_stutter == 3) */ + if (vlevel == context->bw_ctx.dml.soc.num_states) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + } + + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + } + + dml_log_mode_support_params(&context->bw_ctx.dml); + + if (vlevel == context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + + if (!pipe->stream) + continue; + + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && !dc->config.enable_windowed_mpo_odm + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_res.scl_data.recout, + &pipe->plane_res.scl_data.recout, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + + /* merge pipes if necessary */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /*skip pipes that don't need merging*/ + if (!merge[i]) + continue; + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else + ASSERT(0); /* Should never try to merge master pipe */ + + } + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = NULL; + bool odm; + int old_index = -1; + + if (!pipe->stream || newly_split[i]) + continue; + + pipe_idx++; + odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; + + if (!pipe->plane_state && !odm) + continue; + + if (split[i]) { + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) + goto validate_fail; + + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe, odm)) + goto validate_fail; + + newly_split[hsplit_pipe->pipe_idx] = true; + repopulate_pipes = true; + } + if (split[i] == 4) { + struct pipe_ctx *pipe_4to1; + + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + + if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe + && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + hsplit_pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + } + if (odm) + dcn20_build_mapped_resource(dc, context, pipe->stream); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!resource_build_scaling_params(pipe)) + goto validate_fail; + } + } + + /* Actual dsc count per stream dsc validation*/ + if (!dcn20_validate_dsc(dc, context)) { + vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + if (repopulate_pipes) + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + *vlevel_out = vlevel; + *pipe_cnt_out = pipe_cnt; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + + +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx, vlevel_temp = 0; + double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != + dm_dram_clock_change_unsupported; + unsigned int dummy_latency_index = 0; + int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin; + + dc_assert_fp_enabled(); + + // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK + if (!pstate_en && dcn32_subvp_in_use(dc, context)) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + pstate_en = true; + } + + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; + + if (!pstate_en) { + /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */ + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = + dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + dummy_latency_index = dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(dc, + context, pipes, pipe_cnt, vlevel); + + /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch + * we reinstate the original dram_clock_change_latency_us on the context + * and all variables that may have changed up to this point, except the + * newly found dummy_latency_index + */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != + dm_dram_clock_change_unsupported; + } + } + + /* Set B: + * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, + * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark + * calculations to cover bootup clocks. + * DCFCLK: soc.clock_limits[2] when available + * UCLK: soc.clock_limits[2] when available + */ + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 2; + dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set D: + * All clocks min. + * DCFCLK: Min, as reported by PM FW when available + * UCLK : Min, as reported by PM FW when available + * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) + */ + + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 0; + dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set C, for Dummy P-State: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK : Min, as reported by PM FW, when available + * pstate latency as per UCLK state dummy pstate latency + */ + + // For Set A and Set C use values from validation + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts_margin = 160; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us; + + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == + dm_dram_clock_change_unsupported) { + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + + min_dram_speed_mts = + dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; + } + + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + /* find largest table entry that is lower than dram speed, + * but lower than DPM0 still uses DPM0 + */ + for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts) + break; + } + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { + /* The only difference between A and C is p-state latency, if p-state is not supported + * with full p-state latency we want to calculate DLG based on dummy p-state latency, + * Set A p-state watermark set to 0 on DCN30, when p-state unsupported, for now keep as DCN30. + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + } else { + /* Set A: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK: Min, as reported by PM FW, when available + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) + dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); +} + +static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, + unsigned int index) +{ + int i; + + if (*num_entries == 0) + return; + + for (i = index; i < *num_entries - 1; i++) { + table[i] = table[i + 1]; + } + memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); +} + +static int build_synthetic_soc_states(struct clk_bw_params *bw_params, + struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + int i, j; + struct _vcs_dpi_voltage_scaling_st entry = {0}; + + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + + static const unsigned int num_dcfclk_stas = 5; + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + + unsigned int num_uclk_dpms = 0; + unsigned int num_fclk_dpms = 0; + unsigned int num_dcfclk_dpms = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + + if (bw_params->clk_table.entries[i].memclk_mhz > 0) + num_uclk_dpms++; + if (bw_params->clk_table.entries[i].fclk_mhz > 0) + num_fclk_dpms++; + if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) + num_dcfclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) + return -1; + + if (max_dppclk_mhz == 0) + max_dppclk_mhz = max_dispclk_mhz; + + if (max_fclk_mhz == 0) + max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; + + if (max_phyclk_mhz == 0) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + *num_entries = 0; + entry.dispclk_mhz = max_dispclk_mhz; + entry.dscclk_mhz = max_dispclk_mhz / 3; + entry.dppclk_mhz = max_dppclk_mhz; + entry.dtbclk_mhz = max_dtbclk_mhz; + entry.phyclk_mhz = max_phyclk_mhz; + entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + + // Insert all the DCFCLK STAs + for (i = 0; i < num_dcfclk_stas; i++) { + entry.dcfclk_mhz = dcfclk_sta_targets[i]; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // Insert the max DCFCLK + entry.dcfclk_mhz = max_dcfclk_mhz; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // If FCLK is coarse grained, insert individual DPMs. + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + } + // If FCLK fine grained, only insert max + else { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = max_fclk_mhz; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Remove states that require higher clocks than are supported + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz > max_dcfclk_mhz || + table[i].fabricclk_mhz > max_fclk_mhz || + table[i].dram_speed_mts > max_uclk_mhz * 16) + remove_entry_from_table_at_index(table, num_entries, i); + } + + // At this point, the table only contains supported points of interest + // it could be used as is, but some states may be redundant due to + // coarse grained nature of some clocks, so we want to round up to + // coarse grained DPMs and remove duplicates. + + // Round up UCLKs + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { + table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { + table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; + break; + } + } + } + } + // Otherwise, round up to minimum. + else { + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].fabricclk_mhz < min_fclk_mhz) { + table[i].fabricclk_mhz = min_fclk_mhz; + break; + } + } + } + + // Round DCFCLKs up to minimum + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz < min_dcfclk_mhz) { + table[i].dcfclk_mhz = min_dcfclk_mhz; + break; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < *num_entries - 1) { + if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && + table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && + table[i].dram_speed_mts == table[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(table, num_entries, i + 1); + else + i++; + } + + // Fix up the state indicies + for (i = *num_entries - 1; i >= 0 ; i--) { + table[i].state = i; + } + + return 0; +} + +/** + * dcn32_update_bw_bounding_box + * + * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from + * spreadsheet with actual values as per dGPU SKU: + * - with passed few options from dc->config + * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might + * need to get it from PM FW) + * - with passed latency values (passed in ns units) in dc-> bb override for + * debugging purposes + * - with passed latencies from VBIOS (in 100_ns units) if available for + * certain dGPU SKU + * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU + * of the same ASIC) + * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM + * FW for different clocks (which might differ for certain dGPU SKU of the + * same ASIC) + */ +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + /* Overrides from dc->config options */ + dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + + /* Override from passed dc->bb_overrides if available*/ + if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dcn3_2_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + dcn3_2_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + dcn3_2_soc.dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_2_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_2_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_2_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) + dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + } + + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ + dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ + if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { + if (dc->debug.use_legacy_soc_bb_mechanism) { + unsigned int i = 0, j = 0, num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + unsigned int min_dcfclk = UINT_MAX; + /* Set 199 as first value in STA target array to have a minimum DCFCLK value. + * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */ + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 && + bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk) + min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + if (min_dcfclk > dcfclk_sta_targets[0]) + dcfclk_sta_targets[0] = min_dcfclk; + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_2_soc.num_states = num_states; + for (i = 0; i < dcn3_2_soc.num_states; i++) { + dcn3_2_soc.clock_limits[i].state = i; + dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + + /* Fill all states with max values of all these clocks */ + dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; + dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; + + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (i > 0) { + if (!bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz; + } else { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; + + if (!dram_speed_mts[i] && i > 0) + dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts; + else + dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */ + /* PHYCLK_D18, PHYCLK_D32 */ + dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + } + } else { + build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states); + } + + /* Re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h new file mode 100644 index 000000000000..3ed06ab855be --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN32_FPU_H__ +#define __DCN32_FPU_H__ + +#include "clk_mgr_internal.h" + +#define DCN3_2_DEFAULT_DET_SIZE 256 +#define DCN3_2_MAX_DET_SIZE 1152 +#define DCN3_2_MIN_DET_SIZE 128 +#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); + +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt); + +bool dcn32_predict_pipe_split(struct dc_state *context, + display_pipe_params_st pipe, + int index); + +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry); + +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx); + +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate); + +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 349e36ae9333..890612db08dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -30,8 +30,6 @@ #include "../dml_inline_defs.h" #include "display_mode_vba_util_32.h" -static const unsigned int NumberOfStates = DC__VOLTAGE_STATES; - void dml32_recalculate(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( struct display_mode_lib *mode_lib); @@ -67,6 +65,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman int iteration; double MaxTotalRDBandwidth; unsigned int NextPrefetchMode; + double MaxTotalRDBandwidthNoUrgentBurst = 0.0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThanMax = false; + double TWait; + double TotalWRBandwidth = 0; + double WRBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: --- START ---\n", __func__); @@ -217,6 +221,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman // VBA_DELTA // Calculate DET size, swath height dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -226,6 +231,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, v->dummy_vars .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation @@ -287,6 +295,10 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.DETBufferSizeC, &v->UnboundedRequestEnabled, &v->CompressedBufferSizeInkByte, + &v->CompBufReservedSpaceKBytes, + &v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_boolean, /* bool *CompBufReservedSpaceNeedAjustment */ v->dummy_vars .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation .dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */ @@ -295,6 +307,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman .dummy_boolean); /* bool *ViewportSizeSupport */ } + v->CompBufReservedSpaceZs = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0; + v->CompBufReservedSpace64B = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0; + // DCFCLK Deep Sleep dml32_CalculateDCFCLKDeepSleep( mode_lib->vba.NumberOfActiveSurfaces, @@ -446,6 +461,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { dml32_CalculateVMRowAndSwath( + &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters, v->SurfaceSizeInMALL, @@ -702,11 +718,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; do { - double MaxTotalRDBandwidthNoUrgentBurst = 0.0; - bool DestinationLineTimesForPrefetchLessThan2 = false; - bool VRatioPrefetchMoreThanMax = false; - double dummy_unit_vector[DC__NUM_DPP__MAX]; - MaxTotalRDBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); @@ -715,41 +726,41 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* NOTE PerfetchMode variable is invalid in DAL as per the input received. * Hence the direction is to use PrefetchModePerState. */ - double TWait = dml32_CalculateTWait( - mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], - mode_lib->vba.UsesMALLForPStateChange[k], - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay[k], - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - - DmlPipe myPipe; - - myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; - myPipe.Dispclk = mode_lib->vba.DISPCLK; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; - myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; - myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; - myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; - myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = v->BytePerPixelY[k]; - myPipe.BytePerPixelC = v->BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, - &myPipe, v->DSCDelay[k], + TWait = dml32_CalculateTWait( + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.DRRDisplay[k], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( + &v->dummy_vars.dml32_CalculatePrefetchSchedule, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, @@ -898,8 +909,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif { - double dummy_single[1]; - dml32_CalculatePrefetchBandwithSupport( mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, @@ -923,16 +932,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &MaxTotalRDBandwidth, - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->PrefetchModeSupported); } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) - dummy_unit_vector[k] = 1.0; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0; { - double dummy_single[1]; - bool dummy_boolean[1]; dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, v->NoUrgentLatencyHidingPre, @@ -946,17 +953,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->FractionOfUrgentBandwidth, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) { @@ -1039,8 +1046,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } { - double dummy_single[2]; - bool dummy_boolean[1]; dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, mode_lib->vba.ImmediateFlipRequirement, @@ -1064,7 +1069,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth - &dummy_single[0], // Single *FractionOfUrgentBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, @@ -1081,17 +1086,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[1], // Single *TotalBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth - &dummy_boolean[0]); // Boolean *ImmediateFlipBandwidthSupport + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { @@ -1149,24 +1154,20 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman //Watermarks and NB P-State/DRAM Clock Change Support { - SOCParametersList mmSOCParameters; - enum clock_change_support dummy_dramchange_support; - enum dm_fclock_change_support dummy_fclkchange_support; - bool dummy_USRRetrainingSupport; - - mmSOCParameters.UrgentLatency = v->UrgentLatency; - mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; - mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; - mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; - mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; - mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; - mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; - mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; - mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; - mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; - mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], @@ -1182,7 +1183,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dpte_group_bytes, v->meta_row_height, v->meta_row_height_chroma, - mmSOCParameters, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, mode_lib->vba.WritebackChunkSize, mode_lib->vba.SOCCLK, v->DCFCLKDeepSleep, @@ -1219,12 +1220,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* Output */ &v->Watermark, - &dummy_dramchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, v->MaxActiveDRAMClockChangeLatencySupported, v->SubViewportLinesNeededInMALL, - &dummy_fclkchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support, &v->MinActiveFCLKChangeLatencySupported, - &dummy_USRRetrainingSupport, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport, mode_lib->vba.ActiveDRAMClockChangeLatencyMargin); /* DCN32 has a new struct Watermarks (typedef) which is used to store @@ -1486,9 +1487,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double WRBandwidth = 0; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { @@ -1532,8 +1530,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->TotalDataReadBandwidth, mode_lib->vba.DCFCLK, mode_lib->vba.ReturnBW, - mode_lib->vba.CompbufReservedSpace64B, - mode_lib->vba.CompbufReservedSpaceZs, + v->CompbufReservedSpace64B, + v->CompbufReservedSpaceZs, mode_lib->vba.SRExitTime, mode_lib->vba.SRExitZ8Time, mode_lib->vba.SynchronizeTimingsFinal, @@ -1582,9 +1580,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #ifdef __DML_VBA_ALLOW_DELTA__ { - double dummy_single[2]; unsigned int dummy_integer[1]; - bool dummy_boolean[1]; // Calculate z8 stutter eff assuming 0 reserved space dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte, @@ -1598,8 +1594,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->TotalDataReadBandwidth, mode_lib->vba.DCFCLK, mode_lib->vba.ReturnBW, - 0, //mode_lib->vba.CompbufReservedSpace64B, - 0, //mode_lib->vba.CompbufReservedSpaceZs, + 0, //CompbufReservedSpace64B, + 0, //CompbufReservedSpaceZs, mode_lib->vba.SRExitTime, mode_lib->vba.SRExitZ8Time, mode_lib->vba.SynchronizeTimingsFinal, @@ -1637,14 +1633,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->meta_row_bw, v->dpte_row_bw, /* Output */ - &dummy_single[0], - &dummy_single[1], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], &dummy_integer[0], &v->Z8StutterEfficiencyNotIncludingVBlankBestCase, &v->Z8StutterEfficiencyBestCase, &v->Z8NumberOfStutterBurstsPerFrameBestCase, &v->StutterPeriodBestCase, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } #else v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank; @@ -1658,31 +1654,89 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif } +static void mode_support_configuration(struct vba_vars_st *v, + struct display_mode_lib *mode_lib) +{ + int i, j; + + for (i = v->soc.num_states - 1; i >= 0; i--) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.ScaleRatioAndTapsSupport == true + && mode_lib->vba.SourceFormatPixelAndScanSupport == true + && mode_lib->vba.ViewportSizeSupport[i][j] == true + && !mode_lib->vba.LinkRateDoesNotMatchDPVersion + && !mode_lib->vba.LinkRateForMultistreamNotIndicated + && !mode_lib->vba.BPPForMultistreamNotIndicated + && !mode_lib->vba.MultistreamWithHDMIOreDP + && !mode_lib->vba.ExceededMultistreamSlots[i] + && !mode_lib->vba.MSOOrODMSplitWithNonDPLink + && !mode_lib->vba.NotEnoughLanesForMSO + && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 + && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + && !mode_lib->vba.DSC422NativeNotSupported + && !mode_lib->vba.MPCCombineMethodIncompatible + && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true + && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true + && mode_lib->vba.NotEnoughDSCUnits[i] == false + && !mode_lib->vba.NotEnoughDSCSlices[i] + && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false + && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] + && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false + && !mode_lib->vba.InvalidCombinationOfMALLUseForPState + && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->vba.ROBSupport[i][j] == true + && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true + && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true + && mode_lib->vba.NumberOfOTGSupport == true + && mode_lib->vba.NumberOfHDMIFRLSupport == true + && mode_lib->vba.EnoughWritebackUnits == true + && mode_lib->vba.WritebackLatencySupport == true + && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true + && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true + && mode_lib->vba.ViewportExceedsSurface == false + && mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VActiveBandwithSupport[i][j] == true + && mode_lib->vba.DynamicMetadataSupported[i][j] == true + && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true + && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.NonsupportedDSCInputBPC == false + && !mode_lib->vba.ExceededMALLSize + && ((mode_lib->vba.HostVMEnable == false + && !mode_lib->vba.ImmediateFlipRequiredFinal) + || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) + && (!mode_lib->vba.DRAMClockChangeRequirementFinal + || i == v->soc.num_states - 1 + || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) + && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 + || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) + && (!mode_lib->vba.USRRetrainingRequiredFinal + || mode_lib->vba.USRRetrainingSupport[i][j])) { + mode_lib->vba.ModeSupport[i][j] = true; + } else { + mode_lib->vba.ModeSupport[i][j] = false; + } + } + } +} + void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { - unsigned int dummy_integer[4]; - bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; - bool MPCCombineMethodAsPossible; - enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; - unsigned int TotalNumberOfActiveOTG; - unsigned int TotalNumberOfActiveHDMIFRL; - unsigned int TotalNumberOfActiveDP2p0; - unsigned int TotalNumberOfActiveDP2p0Outputs; - unsigned int TotalDSCUnitsRequired; - unsigned int m; - unsigned int ReorderingBytes; - bool FullFrameMALLPStateMethod; - bool SubViewportMALLPStateMethod; - bool PhantomPipeMALLPStateMethod; + struct vba_vars_st *v = &mode_lib->vba; + int i, j; + unsigned int k, m; unsigned int MaximumMPCCombine; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalSlots; + bool CompBufReservedSpaceNeedAdjustment; + bool CompBufReservedSpaceNeedAdjustmentSingleDPP; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: called\n", __func__); #endif - struct vba_vars_st *v = &mode_lib->vba; - - int i, j; - unsigned int k; /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ @@ -1897,27 +1951,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaximumSwathWidthInLineBufferChroma); } - /*Number Of DSC Slices*/ - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if (mode_lib->vba.PixelClockBackEnd[k] > 4800) { - mode_lib->vba.NumberOfDSCSlices[k] = dml_ceil(mode_lib->vba.PixelClockBackEnd[k] / 600, - 4); - } else if (mode_lib->vba.PixelClockBackEnd[k] > 2400) { - mode_lib->vba.NumberOfDSCSlices[k] = 8; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 1200) { - mode_lib->vba.NumberOfDSCSlices[k] = 4; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 340) { - mode_lib->vba.NumberOfDSCSlices[k] = 2; - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 1; - } - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 0; - } - } - dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -1927,6 +1962,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, mode_lib->vba.Output, mode_lib->vba.ReadBandwidthLuma, @@ -1951,7 +1989,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.Read256BlockHeightC, mode_lib->vba.Read256BlockWidthY, mode_lib->vba.Read256BlockWidthC, - dummy_odm_mode, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode, mode_lib->vba.BlendingAndTiming, mode_lib->vba.BytePerPixelY, mode_lib->vba.BytePerPixelC, @@ -1974,38 +2012,31 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long DETBufferSizeC[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool *UnboundedRequestEnabled */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long *CompressedBufferSizeInkByte */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long *CompBufReservedSpaceKBytes */ + &CompBufReservedSpaceNeedAdjustmentSingleDPP, mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ - MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; - MPCCombineMethodAsPossible = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks) - MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible) - MPCCombineMethodAsPossible = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true; } - mode_lib->vba.MPCCombineMethodIncompatible = MPCCombineMethodAsNeededForPStateChangeAndVoltage - && MPCCombineMethodAsPossible; + mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible; for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { - bool NoChroma; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0; mode_lib->vba.TotalAvailablePipesSupport[i][j] = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - - bool TotalAvailablePipesSupportNoDSC; - unsigned int NumberOfDPPNoDSC; - enum odm_combine_mode ODMModeNoDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceNoDSC; - bool TotalAvailablePipesSupportDSC; - unsigned int NumberOfDPPDSC; - enum odm_combine_mode ODMModeDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceDSC; - dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, mode_lib->vba.HActive[k], @@ -2022,10 +2053,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportNoDSC, - &NumberOfDPPNoDSC, - &ODMModeNoDSC, - &RequiredDISPCLKPerSurfaceNoDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC); dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, @@ -2043,10 +2074,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportDSC, - &NumberOfDPPDSC, - &ODMModeDSC, - &RequiredDISPCLKPerSurfaceDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC); dml32_CalculateOutputLink( mode_lib->vba.PHYCLKPerState[i], @@ -2064,8 +2095,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.AudioSampleRate[k], mode_lib->vba.AudioSampleLayout[k], - ODMModeNoDSC, - ODMModeDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, mode_lib->vba.DSCEnable[k], mode_lib->vba.OutputLinkDPLanes[k], mode_lib->vba.OutputLinkDPRate[k], @@ -2079,21 +2110,21 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.RequiredSlots[i][k]); if (mode_lib->vba.RequiresDSC[i][k] == false) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeNoDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceNoDSC; - if (!TotalAvailablePipesSupportNoDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPNoDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC; } else { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceDSC; - if (!TotalAvailablePipesSupportDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC; } } @@ -2128,7 +2159,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0; - NoChroma = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.NoOfDPP[i][j][k] == 1) @@ -2138,17 +2169,26 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l || mode_lib->vba.SourcePixelFormat[k] == dm_420_10 || mode_lib->vba.SourcePixelFormat[k] == dm_420_12 || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChroma = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false; } } + // if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless + // if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision + CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP; + + + if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, - mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, - mode_lib->vba.Output[0])) { + mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma, + mode_lib->vba.Output[0], + mode_lib->vba.SurfaceTiling[0], + CompBufReservedSpaceNeedAdjustment, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { - double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; - unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] @@ -2156,13 +2196,13 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage && mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k] > - BWOfNonCombinedSurfaceOfMaximumBandwidth && + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth && (mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_2to1 && mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) && mode_lib->vba.MPCCombine[i][j][k] == false) { - BWOfNonCombinedSurfaceOfMaximumBandwidth = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k]; NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; @@ -2228,28 +2268,28 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } // i (VOLTAGE_STATE) /* Total Available OTG, HDMIFRL, DP Support Check */ - TotalNumberOfActiveOTG = 0; - TotalNumberOfActiveHDMIFRL = 0; - TotalNumberOfActiveDP2p0 = 0; - TotalNumberOfActiveDP2p0Outputs = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.BlendingAndTiming[k] == k) { - TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1; if (mode_lib->vba.Output[k] == dm_dp2p0) { - TotalNumberOfActiveDP2p0 = TotalNumberOfActiveDP2p0 + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1; if (mode_lib->vba.OutputMultistreamId[k] == k || mode_lib->vba.OutputMultistreamEn[k] == false) { - TotalNumberOfActiveDP2p0Outputs = TotalNumberOfActiveDP2p0Outputs + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1; } } } } - mode_lib->vba.NumberOfOTGSupport = (TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); - mode_lib->vba.NumberOfHDMIFRLSupport = (TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); - mode_lib->vba.NumberOfDP2p0Support = (TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams - && TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); + mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); + mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); + mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); /* Display IO and DSC Support Check */ mode_lib->vba.NonsupportedDSCInputBPC = false; @@ -2264,8 +2304,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (i = 0; i < v->soc.num_states; ++i) { - unsigned int TotalSlots; - mode_lib->vba.ExceededMultistreamSlots[i] = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) { @@ -2436,12 +2474,12 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /* Check DSC Unit and Slices Support */ - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; for (i = 0; i < v->soc.num_states; ++i) { mode_lib->vba.NotEnoughDSCUnits[i] = false; mode_lib->vba.NotEnoughDSCSlices[i] = false; - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.RequiresDSC[i][k] == true) { @@ -2449,33 +2487,31 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.HActive[k] > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 4; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4; if (mode_lib->vba.NumberOfDSCSlices[k] > 16) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.HActive[k] > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 2; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2; if (mode_lib->vba.NumberOfDSCSlices[k] > 8) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else { if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1; if (mode_lib->vba.NumberOfDSCSlices[k] > 4) mode_lib->vba.NotEnoughDSCSlices[i] = true; } } } - if (TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) + if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) mode_lib->vba.NotEnoughDSCUnits[i] = true; } /*DSC Delay per state*/ for (i = 0; i < v->soc.num_states; ++i) { - unsigned int m; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement( mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k], @@ -2513,6 +2549,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -2522,6 +2559,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, mode_lib->vba.Output, mode_lib->vba.ReadBandwidthLuma, @@ -2568,6 +2608,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DETBufferSizeCThisState, &mode_lib->vba.UnboundedRequestEnabledThisState, &mode_lib->vba.CompressedBufferSizeInkByteThisState, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], &mode_lib->vba.ViewportSizeSupport[i][j]); @@ -2707,6 +2749,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateVMRowAndSwath( + &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters, mode_lib->vba.SurfaceSizeInMALL, @@ -2932,7 +2975,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - ReorderingBytes = mode_lib->vba.NumberOfChannels + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); @@ -2988,20 +3031,20 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)); } - FullFrameMALLPStateMethod = false; - SubViewportMALLPStateMethod = false; - PhantomPipeMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - FullFrameMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - SubViewportMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) - PhantomPipeMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true; } - mode_lib->vba.InvalidCombinationOfMALLUseForPState = (SubViewportMALLPStateMethod - != PhantomPipeMALLPStateMethod) || (SubViewportMALLPStateMethod && FullFrameMALLPStateMethod); + mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod + != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod); if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) { dml32_UseMinimumDCFCLK( @@ -3015,7 +3058,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SREnterPlusExitTime, mode_lib->vba.ReturnBusWidth, mode_lib->vba.RoundTripPingLatencyCycles, - ReorderingBytes, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.MetaChunkSize, mode_lib->vba.GPUVMEnable, @@ -3078,7 +3121,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.ReturnBWPerState[i][j] > (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLKState[i][j] - + ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { mode_lib->vba.ROBSupport[i][j] = true; } else { mode_lib->vba.ROBSupport[i][j] = false; @@ -3120,9 +3163,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i < (int) v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { - double VMDataOnlyReturnBWPerState; - double HostVMInefficiencyFactor; - unsigned int NextPrefetchModeState; mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; @@ -3162,37 +3202,35 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); - VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); - HostVMInefficiencyFactor = 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1; if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable) - HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] - / VMDataOnlyReturnBWPerState; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] + / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState; mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency( - mode_lib->vba.RoundTripPingLatencyCycles, ReorderingBytes, + mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j], mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes, - HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, mode_lib->vba.HostVMMaxNonCachedPageTableLevels); - NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; do { - mode_lib->vba.PrefetchModePerState[i][j] = NextPrefetchModeState; + mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - DmlPipe myPipe; - mode_lib->vba.TWait = dml32_CalculateTWait( mode_lib->vba.PrefetchModePerState[i][j], mode_lib->vba.UsesMALLForPStateChange[k], @@ -3202,34 +3240,35 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.SREnterPlusExitTime); - myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; - myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; - myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; - myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; - myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; - myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; - myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( - HostVMInefficiencyFactor, - &myPipe, + &v->dummy_vars.dml32_CalculatePrefetchSchedule, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, @@ -3288,7 +3327,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup - &dummy_integer[0], // unsigned int *VUpdateOffsetPix + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix } @@ -3417,7 +3456,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - dml32_CalculateFlipSchedule(HostVMInefficiencyFactor, + dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.ExtraLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.GPUVMMaxPageTableLevels, @@ -3491,7 +3530,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; - NextPrefetchModeState = NextPrefetchModeState + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } @@ -3505,7 +3544,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && !mode_lib->vba.ImmediateFlipRequiredFinal) || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true)) || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j] - && NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.use_one_row_for_frame_this_state[k] = @@ -3527,6 +3566,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[i][j], @@ -3581,7 +3621,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.Watermark, // Store the values in vba &mode_lib->vba.DRAMClockChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported - &dummy_integer[0], // Long SubViewportLinesNeededInMALL[] + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] &mode_lib->vba.FCLKChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported &mode_lib->vba.USRRetrainingSupport[i][j], @@ -3661,68 +3701,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { - for (j = 0; j < 2; j++) { - if (mode_lib->vba.ScaleRatioAndTapsSupport == true - && mode_lib->vba.SourceFormatPixelAndScanSupport == true - && mode_lib->vba.ViewportSizeSupport[i][j] == true - && !mode_lib->vba.LinkRateDoesNotMatchDPVersion - && !mode_lib->vba.LinkRateForMultistreamNotIndicated - && !mode_lib->vba.BPPForMultistreamNotIndicated - && !mode_lib->vba.MultistreamWithHDMIOreDP - && !mode_lib->vba.ExceededMultistreamSlots[i] - && !mode_lib->vba.MSOOrODMSplitWithNonDPLink - && !mode_lib->vba.NotEnoughLanesForMSO - && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 - && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP - && !mode_lib->vba.DSC422NativeNotSupported - && !mode_lib->vba.MPCCombineMethodIncompatible - && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true - && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true - && mode_lib->vba.NotEnoughDSCUnits[i] == false - && !mode_lib->vba.NotEnoughDSCSlices[i] - && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe - && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen - && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false - && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] - && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false - && !mode_lib->vba.InvalidCombinationOfMALLUseForPState - && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified - && mode_lib->vba.ROBSupport[i][j] == true - && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true - && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true - && mode_lib->vba.NumberOfOTGSupport == true - && mode_lib->vba.NumberOfHDMIFRLSupport == true - && mode_lib->vba.EnoughWritebackUnits == true - && mode_lib->vba.WritebackLatencySupport == true - && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true - && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true - && mode_lib->vba.ViewportExceedsSurface == false - && mode_lib->vba.PrefetchSupported[i][j] == true - && mode_lib->vba.VActiveBandwithSupport[i][j] == true - && mode_lib->vba.DynamicMetadataSupported[i][j] == true - && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true - && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true - && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.NonsupportedDSCInputBPC == false - && !mode_lib->vba.ExceededMALLSize - && ((mode_lib->vba.HostVMEnable == false - && !mode_lib->vba.ImmediateFlipRequiredFinal) - || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) - && (!mode_lib->vba.DRAMClockChangeRequirementFinal - || i == v->soc.num_states - 1 - || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) - && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 - || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) - && (!mode_lib->vba.USRRetrainingRequiredFinal - || mode_lib->vba.USRRetrainingSupport[i][j])) { - mode_lib->vba.ModeSupport[i][j] = true; - } else { - mode_lib->vba.ModeSupport[i][j] = false; - } - } - } + mode_support_configuration(v, mode_lib); MaximumMPCCombine = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5a701d9df0f7..4b010b1b8aed 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -390,61 +390,8 @@ void dml32_CalculateBytePerPixelAndBlockSizes( #endif } // CalculateBytePerPixelAndBlockSizes -void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( - double HRatio, - double HRatioChroma, - double VRatio, - double VRatioChroma, - double MaxDCHUBToPSCLThroughput, - double MaxPSCLToLBThroughput, - double PixelClock, - enum source_format_class SourcePixelFormat, - unsigned int HTaps, - unsigned int HTapsChroma, - unsigned int VTaps, - unsigned int VTapsChroma, - - /* output */ - double *PSCL_THROUGHPUT, - double *PSCL_THROUGHPUT_CHROMA, - double *DPPCLKUsingdoubleDPP) -{ - double DPPCLKUsingdoubleDPPLuma; - double DPPCLKUsingdoubleDPPChroma; - - if (HRatio > 1) { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / - dml_ceil((double) HTaps / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - - DPPCLKUsingdoubleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / - *PSCL_THROUGHPUT, 1); - - if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingdoubleDPPLuma < 2 * PixelClock) - DPPCLKUsingdoubleDPPLuma = 2 * PixelClock; - - if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && - SourcePixelFormat != dm_rgbe_alpha)) { - *PSCL_THROUGHPUT_CHROMA = 0; - *DPPCLKUsingdoubleDPP = DPPCLKUsingdoubleDPPLuma; - } else { - if (HRatioChroma > 1) { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * - HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - DPPCLKUsingdoubleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), - HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); - if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingdoubleDPPChroma < 2 * PixelClock) - DPPCLKUsingdoubleDPPChroma = 2 * PixelClock; - *DPPCLKUsingdoubleDPP = dml_max(DPPCLKUsingdoubleDPPLuma, DPPCLKUsingdoubleDPPChroma); - } -} - void dml32_CalculateSwathAndDETConfiguration( + struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, @@ -454,6 +401,9 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int NumberOfActiveSurfaces, unsigned int nomDETInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, unsigned int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class Output[], double ReadBandwidthLuma[], @@ -501,24 +451,20 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int DETBufferSizeC[], bool *UnboundedRequestEnabled, unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport) { - unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; - unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpSwathSizeBytesY; - unsigned int RoundedUpSwathSizeBytesC; - double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; - double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; unsigned int k; - unsigned int TotalActiveDPP = 0; - bool NoChromaSurfaces = true; - unsigned int DETBufferSizeInKByteForSwathCalculation; + + st_vars->TotalActiveDPP = 0; + st_vars->NoChromaSurfaces = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); + dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); + dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); #endif dml32_CalculateSwathWidth(ForceSingleDPP, NumberOfActiveSurfaces, @@ -548,48 +494,64 @@ void dml32_CalculateSwathAndDETConfiguration( DPPPerSurface, /* Output */ - SwathWidthdoubleDPP, - SwathWidthdoubleDPPChroma, + st_vars->SwathWidthdoubleDPP, + st_vars->SwathWidthdoubleDPPChroma, SwathWidth, SwathWidthChroma, - MaximumSwathHeightY, - MaximumSwathHeightC, + st_vars->MaximumSwathHeightY, + st_vars->MaximumSwathHeightC, swath_width_luma_ub, swath_width_chroma_ub); for (k = 0; k < NumberOfActiveSurfaces; ++k) { - RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; - RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; + st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k]; + st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesY[k]); + st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesC[k]); + st_vars->RoundedUpMaxSwathSizeBytesC[k]); #endif if (SourcePixelFormat[k] == dm_420_10) { - RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); - RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256); } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); + st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChromaSurfaces = false; + st_vars->NoChromaSurfaces = false; } } - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, - NoChromaSurfaces, Output[0]); + // By default, just set the reserved space to 2 pixel chunks size + *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; + + // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data + // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] + // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512); + + if (*CompBufReservedSpaceNeedAdjustment == 1) { + *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512; + } + + #ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); + #endif + + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); dml32_CalculateDETBufferSize(DETSizeOverride, UseMALLForPStateChange, @@ -604,8 +566,8 @@ void dml32_CalculateSwathAndDETConfiguration( SourcePixelFormat, ReadBandwidthLuma, ReadBandwidthChroma, - RoundedUpMaxSwathSizeBytesY, - RoundedUpMaxSwathSizeBytesC, + st_vars->RoundedUpMaxSwathSizeBytesY, + st_vars->RoundedUpMaxSwathSizeBytesC, DPPPerSurface, /* Output */ @@ -613,7 +575,7 @@ void dml32_CalculateSwathAndDETConfiguration( CompressedBufferSizeInkByte); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP); dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); @@ -624,42 +586,42 @@ void dml32_CalculateSwathAndDETConfiguration( *ViewportSizeSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == + st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, - DETBufferSizeInKByteForSwathCalculation); + st_vars->DETBufferSizeInKByteForSwathCalculation); #endif - if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } else { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } - if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) + if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 > + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { *ViewportSizeSupport = false; @@ -674,7 +636,7 @@ void dml32_CalculateSwathAndDETConfiguration( #endif DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; DETBufferSizeC[k] = 0; - } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { + } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); #endif @@ -692,11 +654,11 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesY[k]); + k, st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesC[k]); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); + k, st_vars->RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC); dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); @@ -907,9 +869,12 @@ void dml32_CalculateSwathWidth( } // CalculateSwathWidth bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, - unsigned int TotalNumberOfActiveDPP, - bool NoChroma, - enum output_encoder_class Output) + unsigned int TotalNumberOfActiveDPP, + bool NoChroma, + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) { bool ret_val = false; @@ -917,7 +882,20 @@ bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequest TotalNumberOfActiveDPP == 1 && NoChroma); if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) ret_val = false; - return ret_val; + + if (SurfaceTiling == dm_sw_linear) + ret_val = false; + + if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) + ret_val = false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); +#endif + + return (ret_val); } void dml32_CalculateDETBufferSize( @@ -1686,17 +1664,22 @@ double dml32_RequiredDTBCLK( unsigned int AudioRate, unsigned int AudioLayout) { - double PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); - double HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * - dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); - double HCBlank = 64 + 32 * - dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); - double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; - double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + double PixelWordRate; + double HCActive; + double HCBlank; + double AverageTribyteRate; + double HActiveTribyteRate; if (DSCEnable != true) return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); + HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * + dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + HCBlank = 64 + 32 * + dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + HActiveTribyteRate = PixelWordRate * HCActive / HActive; return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; } @@ -1884,6 +1867,7 @@ void dml32_CalculateSurfaceSizeInMall( } // CalculateSurfaceSizeInMall void dml32_CalculateVMRowAndSwath( + struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], @@ -1949,21 +1933,6 @@ void dml32_CalculateVMRowAndSwath( unsigned int BIGK_FRAGMENT_SIZE[]) { unsigned int k; - unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; - unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; - unsigned int PDEAndMetaPTEBytesFrameY; - unsigned int PDEAndMetaPTEBytesFrameC; - unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; - unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; - bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (HostVMEnable == true) { @@ -1985,15 +1954,15 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && !IsVertical(myPipe[k].SourceRotation)) { - PTEBufferSizeInRequestsForLuma[k] = + st_vars->PTEBufferSizeInRequestsForLuma[k] = (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; + st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k]; } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; } - PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -2013,21 +1982,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForChroma[k], + st_vars->PTEBufferSizeInRequestsForChroma[k], myPipe[k].PitchC, myPipe[k].DCCMetaPitchC, myPipe[k].BlockWidthC, myPipe[k].BlockHeightC, /* Output */ - &MetaRowByteC[k], - &PixelPTEBytesPerRowC[k], + &st_vars->MetaRowByteC[k], + &st_vars->PixelPTEBytesPerRowC[k], &dpte_row_width_chroma_ub[k], &dpte_row_height_chroma[k], &dpte_row_height_linear_chroma[k], - &PixelPTEBytesPerRowC_one_row_per_frame[k], - &dpte_row_width_chroma_ub_one_row_per_frame[k], - &dpte_row_height_chroma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k], + &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_chroma_one_row_per_frame[k], &meta_req_width_chroma[k], &meta_req_height_chroma[k], &meta_row_width_chroma[k], @@ -2055,19 +2024,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillC[k], &MaxNumSwathC[k]); } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; - PTEBufferSizeInRequestsForChroma[k] = 0; - PixelPTEBytesPerRowC[k] = 0; - PDEAndMetaPTEBytesFrameC = 0; - MetaRowByteC[k] = 0; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = 0; + st_vars->PixelPTEBytesPerRowC[k] = 0; + st_vars->PDEAndMetaPTEBytesFrameC = 0; + st_vars->MetaRowByteC[k] = 0; MaxNumSwathC[k] = 0; PrefetchSourceLinesC[k] = 0; - dpte_row_height_chroma_one_row_per_frame[k] = 0; - dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; - PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0; + st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; } - PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -2087,21 +2056,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForLuma[k], + st_vars->PTEBufferSizeInRequestsForLuma[k], myPipe[k].PitchY, myPipe[k].DCCMetaPitchY, myPipe[k].BlockWidthY, myPipe[k].BlockHeightY, /* Output */ - &MetaRowByteY[k], - &PixelPTEBytesPerRowY[k], + &st_vars->MetaRowByteY[k], + &st_vars->PixelPTEBytesPerRowY[k], &dpte_row_width_luma_ub[k], &dpte_row_height_luma[k], &dpte_row_height_linear_luma[k], - &PixelPTEBytesPerRowY_one_row_per_frame[k], - &dpte_row_width_luma_ub_one_row_per_frame[k], - &dpte_row_height_luma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k], + &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_luma_one_row_per_frame[k], &meta_req_width[k], &meta_req_height[k], &meta_row_width[k], @@ -2129,19 +2098,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillY[k], &MaxNumSwathY[k]); - PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; - MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; + PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC; + MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k]; - if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { + if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) { PTEBufferSizeNotExceeded[k] = true; } else { PTEBufferSizeNotExceeded[k] = false; } - one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * - PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); + st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * + st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]); } dml32_CalculateMALLUseForStaticScreen( @@ -2149,7 +2118,7 @@ void dml32_CalculateVMRowAndSwath( MALLAllocatedForDCN, UseMALLForStaticScreen, // mode SurfaceSizeInMALL, - one_row_per_frame_fits_in_buffer, + st_vars->one_row_per_frame_fits_in_buffer, /* Output */ UsesMALLForStaticScreen); // boolen @@ -2175,13 +2144,13 @@ void dml32_CalculateVMRowAndSwath( !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); if (use_one_row_for_frame[k]) { - dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; - dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; - dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; - dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; - PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; + dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k]; + dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k]; + dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k]; + dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k]; + PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k]; } if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) @@ -2189,7 +2158,7 @@ void dml32_CalculateVMRowAndSwath( else DCCMetaBufferSizeNotExceeded[k] = false; - PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; + PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k]; if (use_one_row_for_frame[k]) PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; @@ -2200,11 +2169,11 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].VRatioChroma, myPipe[k].DCCEnable, myPipe[k].HTotal / myPipe[k].PixelClock, - MetaRowByteY[k], MetaRowByteC[k], + st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k], meta_row_height[k], meta_row_height_chroma[k], - PixelPTEBytesPerRowY[k], - PixelPTEBytesPerRowC[k], + st_vars->PixelPTEBytesPerRowY[k], + st_vars->PixelPTEBytesPerRowC[k], dpte_row_height_luma[k], dpte_row_height_chroma[k], @@ -2220,12 +2189,12 @@ void dml32_CalculateVMRowAndSwath( dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", __func__, k, dpte_row_width_luma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]); dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", __func__, k, dpte_row_height_chroma[k]); dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", __func__, k, dpte_row_width_chroma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]); dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", __func__, k, PTEBufferSizeNotExceeded[k]); @@ -3373,6 +3342,7 @@ double dml32_CalculateExtraLatency( } // CalculateExtraLatency bool dml32_CalculatePrefetchSchedule( + struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, @@ -3436,45 +3406,18 @@ bool dml32_CalculatePrefetchSchedule( double *VReadyOffsetPix) { bool MyError = false; - unsigned int DPPCycles, DISPCLKCycles; - double DSTTotalPixelsAfterScaler; - double LineTime; - double dst_y_prefetch_equ; - double prefetch_bw_oto; - double Tvm_oto; - double Tr0_oto; - double Tvm_oto_lines; - double Tr0_oto_lines; - double dst_y_prefetch_oto; - double TimeForFetchingMetaPTE = 0; - double TimeForFetchingRowInVBlank = 0; - double LinesToRequestPrefetchPixelData = 0; - unsigned int HostVMDynamicLevelsTrips; - double trip_to_mem; - double Tvm_trips; - double Tr0_trips; - double Tvm_trips_rounded; - double Tr0_trips_rounded; - double Lsw_oto; - double Tpre_rounded; - double prefetch_bw_equ; - double Tvm_equ; - double Tr0_equ; - double Tdmbf; - double Tdmec; - double Tdmsks; - double prefetch_sw_bytes; - double bytes_pp; - double dep_bytes; - unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; - double min_Lsw; - double Tsw_est1 = 0; - double Tsw_est3 = 0; + + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; + st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; + st_vars->Tsw_est1 = 0; + st_vars->Tsw_est3 = 0; if (GPUVMEnable == true && HostVMEnable == true) - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; else - HostVMDynamicLevelsTrips = 0; + st_vars->HostVMDynamicLevelsTrips = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); @@ -3497,19 +3440,19 @@ bool dml32_CalculatePrefetchSchedule( TSetup, /* output */ - &Tdmbf, - &Tdmec, - &Tdmsks, + &st_vars->Tdmbf, + &st_vars->Tdmec, + &st_vars->Tdmsks, VUpdateOffsetPix, VUpdateWidthPix, VReadyOffsetPix); - LineTime = myPipe->HTotal / myPipe->PixelClock; - trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; + st_vars->trip_to_mem = UrgentLatency; + st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); if (DynamicMetadataVMEnabled == true) - *Tdmdl = TWait + Tvm_trips + trip_to_mem; + *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; else *Tdmdl = TWait + UrgentExtraLatency; @@ -3519,15 +3462,15 @@ bool dml32_CalculatePrefetchSchedule( #endif if (DynamicMetadataEnable == true) { - if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", - __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", - __func__, Tdmsks); + __func__, st_vars->Tdmsks); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); #endif @@ -3539,21 +3482,21 @@ bool dml32_CalculatePrefetchSchedule( } *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && - GPUVMEnable == true ? TWait + Tvm_trips : 0); + GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) return true; - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * + *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * myPipe->PixelClock / myPipe->Dispclk + DSCDelay; *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) @@ -3563,10 +3506,10 @@ bool dml32_CalculatePrefetchSchedule( + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); + dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); - dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); @@ -3579,9 +3522,9 @@ bool dml32_CalculatePrefetchSchedule( else *DSTYAfterScaler = 0; - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); @@ -3589,132 +3532,132 @@ bool dml32_CalculatePrefetchSchedule( MyError = false; - Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); + st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); if (GPUVMEnable == true) { - Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; if (GPUVMPageTableLevels >= 3) { - *Tno_bw = UrgentExtraLatency + trip_to_mem * - (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); + *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * + (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { - Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / - 4.0 * LineTime; // VBA_ERROR + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / + 4.0 * st_vars->LineTime; // VBA_ERROR *Tno_bw = UrgentExtraLatency; } else { *Tno_bw = 0; } } else if (myPipe->DCCEnable == true) { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; *Tno_bw = 0; } else { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = LineTime / 2.0; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; *Tno_bw = 0; } - Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); - Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); + st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); + st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; } else { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; } - prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; - prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, - prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); + st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, + st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); - min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; - min_Lsw = dml_max(min_Lsw, 1.0); - Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; + st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; + st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); + st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; if (GPUVMEnable == true) { - Tvm_oto = dml_max3( - Tvm_trips, - *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, - LineTime / 4.0); + st_vars->Tvm_oto = dml_max3( + st_vars->Tvm_trips, + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, + st_vars->LineTime / 4.0); } else - Tvm_oto = LineTime / 4.0; + st_vars->Tvm_oto = st_vars->LineTime / 4.0; if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_oto = dml_max4( - Tr0_trips, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, - (LineTime - Tvm_oto)/2.0, - LineTime / 4.0); + st_vars->Tr0_oto = dml_max4( + st_vars->Tr0_trips, + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, + (st_vars->LineTime - st_vars->Tvm_oto)/2.0, + st_vars->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); #endif } else - Tr0_oto = (LineTime - Tvm_oto) / 2.0; + st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; - Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; - Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; - dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; + st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; + st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; + st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; - dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - + st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); - dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); + dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); - dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); - dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); - dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); - dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); - dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); #endif - dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; - Tpre_rounded = dst_y_prefetch_equ * LineTime; + st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; + st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); - dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); + dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", - __func__, VStartup * LineTime); + __func__, VStartup * st_vars->LineTime); dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", __func__, *DSTYAfterScaler); #endif - dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, + st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); - if (prefetch_sw_bytes < dep_bytes) - prefetch_sw_bytes = 2 * dep_bytes; + if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) + st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; @@ -3722,61 +3665,61 @@ bool dml32_CalculatePrefetchSchedule( *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; - if (dst_y_prefetch_equ > 1) { + if (st_vars->dst_y_prefetch_equ > 1) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; double PrefetchBandwidth4; - if (Tpre_rounded - *Tno_bw > 0) { + if (st_vars->Tpre_rounded - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); - Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); + st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; } else PrefetchBandwidth1 = 0; - if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) - && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { + if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) + && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); } - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) - PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / + (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); else PrefetchBandwidth2 = 0; - if (Tpre_rounded - Tvm_trips_rounded > 0) { + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); - Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); + st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; } else PrefetchBandwidth3 = 0; if (VStartup == MaxVStartup && - (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * - LineTime - Tvm_trips_rounded > 0) { + (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * + st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); } - if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { - PrefetchBandwidth4 = prefetch_sw_bytes / - (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { + PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / + (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); } else { PrefetchBandwidth4 = 0; } #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); - dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); - dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); @@ -3789,9 +3732,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth1 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth1 >= Tr0_trips_rounded) { + / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { Case1OK = true; } else { Case1OK = false; @@ -3802,9 +3745,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth2 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth2 < Tr0_trips_rounded) { + / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { Case2OK = true; } else { Case2OK = false; @@ -3815,9 +3758,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth3 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < - Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * + st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= - Tr0_trips_rounded) { + st_vars->Tr0_trips_rounded) { Case3OK = true; } else { Case3OK = false; @@ -3827,80 +3770,80 @@ bool dml32_CalculatePrefetchSchedule( } if (Case1OK) - prefetch_bw_equ = PrefetchBandwidth1; + st_vars->prefetch_bw_equ = PrefetchBandwidth1; else if (Case2OK) - prefetch_bw_equ = PrefetchBandwidth2; + st_vars->prefetch_bw_equ = PrefetchBandwidth2; else if (Case3OK) - prefetch_bw_equ = PrefetchBandwidth3; + st_vars->prefetch_bw_equ = PrefetchBandwidth3; else - prefetch_bw_equ = PrefetchBandwidth4; + st_vars->prefetch_bw_equ = PrefetchBandwidth4; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); #endif - if (prefetch_bw_equ > 0) { + if (st_vars->prefetch_bw_equ > 0) { if (GPUVMEnable == true) { - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * - HostVMInefficiencyFactor / prefetch_bw_equ, - Tvm_trips, LineTime / 4); + st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * + HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, + st_vars->Tvm_trips, st_vars->LineTime / 4); } else { - Tvm_equ = LineTime / 4; + st_vars->Tvm_equ = st_vars->LineTime / 4; } if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * - HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, - (LineTime - Tvm_equ) / 2, LineTime / 4); + st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * + HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, + (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); } else { - Tr0_equ = (LineTime - Tvm_equ) / 2; + st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; } } else { - Tvm_equ = 0; - Tr0_equ = 0; + st_vars->Tvm_equ = 0; + st_vars->Tr0_equ = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); #endif } } - if (dst_y_prefetch_oto < dst_y_prefetch_equ) { - *DestinationLinesForPrefetch = dst_y_prefetch_oto; - TimeForFetchingMetaPTE = Tvm_oto; - TimeForFetchingRowInVBlank = Tr0_oto; - *PrefetchBandwidth = prefetch_bw_oto; + if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; + *PrefetchBandwidth = st_vars->prefetch_bw_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; + *PrefetchBandwidth = st_vars->prefetch_bw_equ; } - *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; *DestinationLinesToRequestRowInVBlank = - dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; + dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; - LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - + st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); #endif - if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { - *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; + if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { + *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); @@ -3908,12 +3851,12 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); #endif if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / - LinesToRequestPrefetchPixelData, + st_vars->LinesToRequestPrefetchPixelData, (double) MaxNumSwathY * SwathHeightY / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); } else { @@ -3927,7 +3870,7 @@ bool dml32_CalculatePrefetchSchedule( #endif } - *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ @@ -3936,11 +3879,11 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); #endif if ((SwathHeightC > 4)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { *VRatioPrefetchC = dml_max(*VRatioPrefetchC, (double) MaxNumSwathC * SwathHeightC / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); } else { @@ -3955,25 +3898,25 @@ bool dml32_CalculatePrefetchSchedule( } *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY - / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub - / LineTime; + / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub + / st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); #endif *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / - LinesToRequestPrefetchPixelData + st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC - * swath_width_chroma_ub / LineTime; + * swath_width_chroma_ub / st_vars->LineTime; } else { MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", - __func__, LinesToRequestPrefetchPixelData); + __func__, st_vars->LinesToRequestPrefetchPixelData); #endif *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; @@ -3982,15 +3925,15 @@ bool dml32_CalculatePrefetchSchedule( } #ifdef __DML_VBA_DEBUG__ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", - (double)LinesToRequestPrefetchPixelData * LineTime + - 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); + (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + + 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); dml_print("DML: To: %fus - time for propagation from scaler to optc\n", - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - - TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + - ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - + st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); #endif @@ -3998,7 +3941,7 @@ bool dml32_CalculatePrefetchSchedule( MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", - __func__, dst_y_prefetch_equ); + __func__, st_vars->dst_y_prefetch_equ); #endif } @@ -4014,10 +3957,10 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); #endif prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / - (*DestinationLinesToRequestVMInVBlank * LineTime); + (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -4034,7 +3977,7 @@ bool dml32_CalculatePrefetchSchedule( prefetch_row_bw = 0; } else if (*DestinationLinesToRequestRowInVBlank > 0) { prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / - (*DestinationLinesToRequestRowInVBlank * LineTime); + (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); @@ -4057,12 +4000,12 @@ bool dml32_CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; @@ -4216,6 +4159,7 @@ void dml32_CalculateFlipSchedule( } // CalculateFlipSchedule void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, @@ -4277,37 +4221,15 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( double ActiveDRAMClockChangeLatencyMargin[]) { unsigned int i, j, k; - unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; - unsigned int DRAMClockChangeSupportNumber = 0; - unsigned int LastSurfaceWithoutMargin; - unsigned int DRAMClockChangeMethod = 0; - bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; - double MinActiveFCLKChangeMargin = 0.; - double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; - double ActiveClockChangeLatencyHidingY; - double ActiveClockChangeLatencyHidingC; - double ActiveClockChangeLatencyHiding; - double EffectiveDETBufferSizeY; - double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; - double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; - double TotalPixelBW = 0.0; - bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; - double EffectiveLBLatencyHidingY; - double EffectiveLBLatencyHidingC; - double LinesInDETY[DC__NUM_DPP__MAX]; - double LinesInDETC[DC__NUM_DPP__MAX]; - unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; - double FullDETBufferingTimeY; - double FullDETBufferingTimeC; - double WritebackDRAMClockChangeLatencyMargin; - double WritebackFCLKChangeLatencyMargin; - double WritebackLatencyHiding; - bool SameTimingForFCLKChange; - - unsigned int TotalActiveWriteback = 0; - unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; - unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; + + st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0; + st_vars->DRAMClockChangeSupportNumber = 0; + st_vars->DRAMClockChangeMethod = 0; + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; + st_vars->MinActiveFCLKChangeMargin = 0.; + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; + st_vars->TotalPixelBW = 0.0; + st_vars->TotalActiveWriteback = 0; Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency @@ -4339,13 +4261,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif - TotalActiveWriteback = 0; + st_vars->TotalActiveWriteback = 0; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (WritebackEnable[k] == true) - TotalActiveWriteback = TotalActiveWriteback + 1; + st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1; } - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; } else { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency @@ -4355,7 +4277,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark + mmSOCParameters.USRRetrainingLatency; - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.WritebackLatency; Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency @@ -4385,14 +4307,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + + st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); - LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); + st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); + st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); #ifdef __DML_VBA_DEBUG__ @@ -4403,72 +4325,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); #endif - EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); - EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); - EffectiveDETBufferSizeY = DETBufferSizeY[k]; + st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k]; if (UnboundedRequestEnabled) { - EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) - / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW; } - LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; - LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]); + st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; - ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; } if (BytePerPixelDETC[k] > 0) { - LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; - LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) + st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]); + st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; - ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; } - ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, - ActiveClockChangeLatencyHidingC); + st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY, + st_vars->ActiveClockChangeLatencyHidingC); } else { - ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; + st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY; } - ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->DRAMClockChangeWatermark; - ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->FCLKChangeWatermark; - USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; + st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; if (WritebackEnable[k]) { - WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 + st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); if (WritebackPixelFormat[k] == dm_444_64) - WritebackLatencyHiding = WritebackLatencyHiding / 2; + st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2; - WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackDRAMClockChangeWatermark; - WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackFCLKChangeWatermark; ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], - WritebackFCLKChangeLatencyMargin); - ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], - WritebackDRAMClockChangeLatencyMargin); + st_vars->WritebackFCLKChangeLatencyMargin); + st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k], + st_vars->WritebackDRAMClockChangeLatencyMargin); } MaxActiveDRAMClockChangeLatencySupported[k] = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? @@ -4487,41 +4409,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (DRRDisplay[i] || DRRDisplay[j]))) { - SynchronizedSurfaces[i][j] = true; + st_vars->SynchronizedSurfaces[i][j] = true; } else { - SynchronizedSurfaces[i][j] = false; + st_vars->SynchronizedSurfaces[i][j] = false; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || - ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { - FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; - MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; - SurfaceWithMinActiveFCLKChangeMargin = k; + (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin || + st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) { + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; + st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k]; + st_vars->SurfaceWithMinActiveFCLKChangeMargin = k; } } - *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; + *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; - SameTimingForFCLKChange = true; + st_vars->SameTimingForFCLKChange = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { + if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (SameTimingForFCLKChange || - ActiveFCLKChangeLatencyMargin[k] < - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; + (st_vars->SameTimingForFCLKChange || + st_vars->ActiveFCLKChangeLatencyMargin[k] < + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k]; } - SameTimingForFCLKChange = false; + st_vars->SameTimingForFCLKChange = false; } } - if (MinActiveFCLKChangeMargin > 0) { + if (st_vars->MinActiveFCLKChangeMargin > 0) { *FCLKChangeSupport = dm_fclock_change_vactive; - } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && + } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && (PrefetchMode <= 1)) { *FCLKChangeSupport = dm_fclock_change_vblank; } else { @@ -4531,7 +4453,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *USRRetrainingSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (USRRetrainingLatencyMargin[k] < 0)) { + (st_vars->USRRetrainingLatencyMargin[k] < 0)) { *USRRetrainingSupport = false; } } @@ -4542,42 +4464,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && ActiveDRAMClockChangeLatencyMargin[k] < 0) { if (PrefetchMode > 0) { - DRAMClockChangeSupportNumber = 2; - } else if (DRAMClockChangeSupportNumber == 0) { - DRAMClockChangeSupportNumber = 1; - LastSurfaceWithoutMargin = k; - } else if (DRAMClockChangeSupportNumber == 1 && - !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { - DRAMClockChangeSupportNumber = 2; + st_vars->DRAMClockChangeSupportNumber = 2; + } else if (st_vars->DRAMClockChangeSupportNumber == 0) { + st_vars->DRAMClockChangeSupportNumber = 1; + st_vars->LastSurfaceWithoutMargin = k; + } else if (st_vars->DRAMClockChangeSupportNumber == 1 && + !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) { + st_vars->DRAMClockChangeSupportNumber = 2; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - DRAMClockChangeMethod = 1; + st_vars->DRAMClockChangeMethod = 1; else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - DRAMClockChangeMethod = 2; + st_vars->DRAMClockChangeMethod = 2; } - if (DRAMClockChangeMethod == 0) { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeMethod == 0) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; - } else if (DRAMClockChangeMethod == 1) { - if (DRAMClockChangeSupportNumber == 0) + } else if (st_vars->DRAMClockChangeMethod == 1) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } else { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; @@ -4591,7 +4513,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); - src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; + src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k]; sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; #ifdef __DML_VBA_DEBUG__ @@ -4599,7 +4521,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DET dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); -dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); +dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]); dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); @@ -4610,7 +4532,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l if (BytePerPixelDETC[k] > 0) { src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); - src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; + src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k]; sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 72461b934ee0..37a314ce284b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -30,6 +30,7 @@ #include "os_types.h" #include "../dc_features.h" #include "../display_mode_structs.h" +#include "dml/display_mode_vba.h" unsigned int dml32_dscceComputeDelay( unsigned int bpc, @@ -81,6 +82,7 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( double *DPPCLKUsingSingleDPP); void dml32_CalculateSwathAndDETConfiguration( + struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, @@ -90,6 +92,9 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int NumberOfActiveSurfaces, unsigned int nomDETInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, unsigned int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class Output[], double ReadBandwidthLuma[], @@ -137,6 +142,8 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int DETBufferSizeC[], bool *UnboundedRequestEnabled, unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport); @@ -181,7 +188,10 @@ void dml32_CalculateSwathWidth( bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, unsigned int TotalNumberOfActiveDPP, bool NoChroma, - enum output_encoder_class Output); + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); void dml32_CalculateDETBufferSize( unsigned int DETSizeOverride[], @@ -352,6 +362,7 @@ void dml32_CalculateSurfaceSizeInMall( bool *ExceededMALLSize); void dml32_CalculateVMRowAndSwath( + struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], @@ -704,6 +715,7 @@ double dml32_CalculateExtraLatency( unsigned int HostVMMaxNonCachedPageTableLevels); bool dml32_CalculatePrefetchSchedule( + struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, @@ -799,6 +811,7 @@ void dml32_CalculateFlipSchedule( bool *ImmediateFlipSupportedForPipe); void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c new file mode 100644 index 000000000000..84b4b00f29cb --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -0,0 +1,684 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "clk_mgr.h" +#include "resource.h" +#include "dcn321_fpu.h" +#include "dcn32/dcn32_resource.h" +#include "dcn321/dcn321_resource.h" + +#define DCN3_2_DEFAULT_DET_SIZE 256 + +struct _vcs_dpi_ip_params_st dcn3_21_ip = { + .gpuvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_enable = 0, + .rob_buffer_size_kbytes = 128, + .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1280, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .dcc_meta_buffer_size_bytes = 6272, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 6016, + .dsc422_native_support = true, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 57, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 125, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, + .max_num_dp2p0_outputs = 2, + .max_num_dp2p0_streams = 4, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 1564.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 2150.0, + .dppclk_mhz = 2150.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .phyclk_d32_mhz = 625.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 716.667, + .dram_speed_mts = 1600.0, + .dtbclk_mhz = 1564.0, + }, + }, + .num_states = 1, + .sr_exit_time_us = 12.36, + .sr_enter_plus_exit_time_us = 16.72, + .sr_exit_z8_time_us = 285.0, + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, + .mall_allocated_for_dcn_mbytes = 64, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 100.0, + .pct_ideal_fabric_bw_after_urgent = 67.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_bw_after_urgent_strobe = 67.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_fabric_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_strobe_percent = 50.0, + .max_avg_dram_bw_use_normal_percent = 15.0, + .num_chans = 8, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .dram_clock_change_latency_us = 400, + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + +static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); + + entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); + + entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) +{ + float memory_bw_kbytes_sec; + float fabric_bw_kbytes_sec; + float sdp_bw_kbytes_sec; + float limiting_bw_kbytes_sec; + + memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); + + sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); + + limiting_bw_kbytes_sec = memory_bw_kbytes_sec; + + if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; + + if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; + + return limiting_bw_kbytes_sec; +} + +void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry) +{ + int i = 0; + int index = 0; + float net_bw_of_new_state = 0; + + dc_assert_fp_enabled(); + + get_optimal_ntuple(entry); + + if (*num_entries == 0) { + table[0] = *entry; + (*num_entries)++; + } else { + net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); + while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + index++; + if (index >= *num_entries) + break; + } + + for (i = *num_entries; i > index; i--) + table[i] = table[i - 1]; + + table[index] = *entry; + (*num_entries)++; + } +} + +static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, + unsigned int index) +{ + int i; + + if (*num_entries == 0) + return; + + for (i = index; i < *num_entries - 1; i++) { + table[i] = table[i + 1]; + } + memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); +} + +static int build_synthetic_soc_states(struct clk_bw_params *bw_params, + struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + int i, j; + struct _vcs_dpi_voltage_scaling_st entry = {0}; + + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + + static const unsigned int num_dcfclk_stas = 5; + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + + unsigned int num_uclk_dpms = 0; + unsigned int num_fclk_dpms = 0; + unsigned int num_dcfclk_dpms = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + + if (bw_params->clk_table.entries[i].memclk_mhz > 0) + num_uclk_dpms++; + if (bw_params->clk_table.entries[i].fclk_mhz > 0) + num_fclk_dpms++; + if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) + num_dcfclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) + return -1; + + if (max_dppclk_mhz == 0) + max_dppclk_mhz = max_dispclk_mhz; + + if (max_fclk_mhz == 0) + max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; + + if (max_phyclk_mhz == 0) + max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; + + *num_entries = 0; + entry.dispclk_mhz = max_dispclk_mhz; + entry.dscclk_mhz = max_dispclk_mhz / 3; + entry.dppclk_mhz = max_dppclk_mhz; + entry.dtbclk_mhz = max_dtbclk_mhz; + entry.phyclk_mhz = max_phyclk_mhz; + entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; + + // Insert all the DCFCLK STAs + for (i = 0; i < num_dcfclk_stas; i++) { + entry.dcfclk_mhz = dcfclk_sta_targets[i]; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // Insert the max DCFCLK + entry.dcfclk_mhz = max_dcfclk_mhz; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // If FCLK is coarse grained, insert individual DPMs. + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + } + // If FCLK fine grained, only insert max + else { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = max_fclk_mhz; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Remove states that require higher clocks than are supported + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz > max_dcfclk_mhz || + table[i].fabricclk_mhz > max_fclk_mhz || + table[i].dram_speed_mts > max_uclk_mhz * 16) + remove_entry_from_table_at_index(table, num_entries, i); + } + + // At this point, the table only contains supported points of interest + // it could be used as is, but some states may be redundant due to + // coarse grained nature of some clocks, so we want to round up to + // coarse grained DPMs and remove duplicates. + + // Round up UCLKs + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { + table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { + table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; + break; + } + } + } + } + // Otherwise, round up to minimum. + else { + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].fabricclk_mhz < min_fclk_mhz) { + table[i].fabricclk_mhz = min_fclk_mhz; + break; + } + } + } + + // Round DCFCLKs up to minimum + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz < min_dcfclk_mhz) { + table[i].dcfclk_mhz = min_dcfclk_mhz; + break; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < *num_entries - 1) { + if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && + table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && + table[i].dram_speed_mts == table[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(table, num_entries, i + 1); + else + i++; + } + + // Fix up the state indicies + for (i = *num_entries - 1; i >= 0 ; i--) { + table[i].state = i; + } + + return 0; +} + +static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +/** dcn321_update_bw_bounding_box + * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet + * with actual values as per dGPU SKU: + * -with passed few options from dc->config + * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) + * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes + * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU + * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) + * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different + * clocks (which might differ for certain dGPU SKU of the same ASIC) + */ +void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + /* Overrides from dc->config options */ + dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + + /* Override from passed dc->bb_overrides if available*/ + if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dcn3_21_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + dcn3_21_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + dcn3_21_soc.dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_21_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_21_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_21_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) + dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + } + + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ + dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ + if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { + if (dc->debug.use_legacy_soc_bb_mechanism) { + unsigned int i = 0, j = 0, num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564}; + unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; + + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_21_soc.num_states = num_states; + for (i = 0; i < dcn3_21_soc.num_states; i++) { + dcn3_21_soc.clock_limits[i].state = i; + dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + + /* Fill all states with max values of all these clocks */ + dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; + dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; + + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (i > 0) { + if (!bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz; + } else { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; + + if (!dram_speed_mts[i] && i > 0) + dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; + else + dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ + /* PHYCLK_D18, PHYCLK_D32 */ + dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; + } + } else { + build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states); + } + + /* Re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h new file mode 100644 index 000000000000..e8fad9b4be69 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN32_FPU_H__ +#define __DCN32_FPU_H__ + +#include "dml/display_mode_vba.h" + +void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry); + +void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 87c9b9f9976e..e8b094006d95 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -499,6 +499,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int refresh_rate; bool synchronize_timings; unsigned int odm_combine_policy; + bool drr_display; }; struct _vcs_dpi_display_pipe_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 39f93072b5e0..503e7d984ff0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -111,6 +111,7 @@ dml_get_attr_func(tcalc, mode_lib->vba.TCalc); dml_get_attr_func(fraction_of_urgent_bandwidth, mode_lib->vba.FractionOfUrgentBandwidth); dml_get_attr_func(fraction_of_urgent_bandwidth_imm_flip, mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip); + dml_get_attr_func(cstate_max_cap_mode, mode_lib->vba.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); dml_get_attr_func(comp_buffer_size_kbytes, mode_lib->vba.CompressedBufferSizeInkByte); dml_get_attr_func(pixel_chunk_size_in_kbyte, mode_lib->vba.PixelChunkSizeInKByte); @@ -251,7 +252,7 @@ unsigned int get_total_surface_size_in_mall_bytes( return size; } -unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx) +static unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx) { int pipe_idx = -1; int i; @@ -697,6 +698,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz; + mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] = dst->drr_display; if (ip->is_line_buffer_bpp_fixed) mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] = ip->line_buffer_fixed_bpp; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 47b149d4bfcf..8460aefe7b6d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -165,7 +165,6 @@ unsigned int get_total_surface_size_in_mall_bytes( struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes); -unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx); bool get_is_phantom_pipe(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, @@ -183,6 +182,108 @@ void Calculate256BBlockSizes( unsigned int *BlockWidth256BytesY, unsigned int *BlockWidth256BytesC); +struct dml32_CalculateSwathAndDETConfiguration { + unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; + unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpSwathSizeBytesY; + unsigned int RoundedUpSwathSizeBytesC; + double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; + double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; + unsigned int TotalActiveDPP; + bool NoChromaSurfaces; + unsigned int DETBufferSizeInKByteForSwathCalculation; +}; + +struct dml32_CalculateVMRowAndSwath { + unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; + unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; + unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; + bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; +}; + +struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport { + unsigned int SurfaceWithMinActiveFCLKChangeMargin; + unsigned int DRAMClockChangeSupportNumber; + unsigned int LastSurfaceWithoutMargin; + unsigned int DRAMClockChangeMethod; + bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin; + double MinActiveFCLKChangeMargin; + double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank; + double ActiveClockChangeLatencyHidingY; + double ActiveClockChangeLatencyHidingC; + double ActiveClockChangeLatencyHiding; + double EffectiveDETBufferSizeY; + double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; + double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; + double TotalPixelBW; + bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DC__NUM_DPP__MAX]; + double LinesInDETC[DC__NUM_DPP__MAX]; + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; + unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double WritebackDRAMClockChangeLatencyMargin; + double WritebackFCLKChangeLatencyMargin; + double WritebackLatencyHiding; + bool SameTimingForFCLKChange; + unsigned int TotalActiveWriteback; + unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; + unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; +}; + +struct dml32_CalculatePrefetchSchedule { + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE; + double TimeForFetchingRowInVBlank; + double LinesToRequestPrefetchPixelData; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips; + double Tr0_trips; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double Lsw_oto; + double Tpre_rounded; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double prefetch_sw_bytes; + double bytes_pp; + double dep_bytes; + unsigned int max_vratio_pre; + double min_Lsw; + double Tsw_est1; + double Tsw_est3; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -197,6 +298,13 @@ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCal unsigned int ReorderBytes; unsigned int VMDataOnlyReturnBW; double HostVMInefficiencyFactor; + DmlPipe myPipe; + SOCParametersList mmSOCParameters; + double dummy_unit_vector[DC__NUM_DPP__MAX]; + double dummy_single[2]; + enum clock_change_support dummy_dramchange_support; + enum dm_fclock_change_support dummy_fclkchange_support; + bool dummy_USRRetrainingSupport; }; struct dml32_ModeSupportAndSystemConfigurationFull { @@ -212,12 +320,45 @@ struct dml32_ModeSupportAndSystemConfigurationFull { double DSTXAfterScaler[DC__NUM_DPP__MAX]; double MaxTotalVActiveRDBandwidth; bool dummy_boolean_array[2][DC__NUM_DPP__MAX]; + enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; + DmlPipe myPipe; + unsigned int dummy_integer[4]; + unsigned int TotalNumberOfActiveOTG; + unsigned int TotalNumberOfActiveHDMIFRL; + unsigned int TotalNumberOfActiveDP2p0; + unsigned int TotalNumberOfActiveDP2p0Outputs; + unsigned int TotalDSCUnitsRequired; + unsigned int ReorderingBytes; + unsigned int TotalSlots; + unsigned int NumberOfDPPDSC; + unsigned int NumberOfDPPNoDSC; + unsigned int NextPrefetchModeState; + bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; + bool MPCCombineMethodAsPossible; + bool FullFrameMALLPStateMethod; + bool SubViewportMALLPStateMethod; + bool PhantomPipeMALLPStateMethod; + bool NoChroma; + bool TotalAvailablePipesSupportNoDSC; + bool TotalAvailablePipesSupportDSC; + enum odm_combine_mode ODMModeNoDSC; + enum odm_combine_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + double RequiredDISPCLKPerSurfaceDSC; + double BWOfNonCombinedSurfaceOfMaximumBandwidth; + double VMDataOnlyReturnBWPerState; + double HostVMInefficiencyFactor; + bool dummy_boolean[2]; }; struct dummy_vars { struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; + struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; + struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; + struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule; }; struct vba_vars_st { diff --git a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h index e2b3a2c7a927..8f8ac8e29ed0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h @@ -160,8 +160,11 @@ struct calc_pll_clock_source { struct clock_source_funcs { bool (*cs_power_down)( struct clock_source *); - bool (*program_pix_clk)(struct clock_source *, - struct pixel_clk_params *, struct pll_settings *); + bool (*program_pix_clk)( + struct clock_source *, + struct pixel_clk_params *, + enum dp_link_encoding encoding, + struct pll_settings *); uint32_t (*get_pix_clk_dividers)( struct clock_source *, struct pixel_clk_params *, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index e4b4102b1538..b3d0a4ea2446 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -96,6 +96,7 @@ struct resource_funcs { struct panel_cntl*(*panel_cntl_create)( const struct panel_cntl_init_data *panel_cntl_init_data); struct link_encoder *(*link_enc_create)( + struct dc_context *ctx, const struct encoder_init_data *init); /* Create a minimal link encoder object with no dc_link object * associated with it. */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index c2d116cce119..ce006762f257 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -101,7 +101,8 @@ struct dccg_funcs { void (*set_dpstreamclk)( struct dccg *dccg, enum streamclk_source src, - int otg_inst); + int otg_inst, + int dp_hpo_inst); void (*enable_symclk32_se)( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 906818e792dd..44c4578193a3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -198,7 +198,7 @@ struct hubp_funcs { void (*hubp_soft_reset)(struct hubp *hubp, bool reset); void (*hubp_update_force_pstate_disallow)(struct hubp *hubp, bool allow); - void (*hubp_update_mall_sel)(struct hubp *hubp, uint32_t mall_sel); + void (*hubp_update_mall_sel)(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); void (*hubp_prepare_subvp_buffering)(struct hubp *hubp, bool enable); void (*hubp_set_flip_int)(struct hubp *hubp); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 456dbe9f2264..42afa1952890 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -252,6 +252,8 @@ struct stream_encoder_funcs { void (*set_input_mode)( struct stream_encoder *enc, unsigned int pix_per_container); + void (*enable_fifo)(struct stream_encoder *enc); + void (*disable_fifo)(struct stream_encoder *enc); }; struct hpo_dp_stream_encoder_state { diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index 146cd1819912..2aa74ee1502a 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -289,6 +289,13 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vblank_irq_info_funcs\ } +#define dmub_trace_int_entry()\ + [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ + IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ + DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ + .funcs = &dmub_trace_irq_info_funcs\ + } + #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -297,13 +304,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vline0_irq_info_funcs\ } -#define dmub_trace_int_entry()\ - [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ - IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ - DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ - .funcs = &dmub_trace_irq_info_funcs\ - } - #define dummy_irq_entry() \ {\ .funcs = &dummy_irq_info_funcs\ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c index 66e60762388e..1d149d290147 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c @@ -24,6 +24,10 @@ static enum dc_irq_source to_dal_irq_source_dcn303(struct irq_service *irq_servi return DC_IRQ_SOURCE_VBLANK1; case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP: return DC_IRQ_SOURCE_VBLANK2; + case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC1_VLINE0; + case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC2_VLINE0; case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT: return DC_IRQ_SOURCE_PFLIP1; case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT: @@ -96,6 +100,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vline0_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -164,6 +173,14 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .funcs = &vblank_irq_info_funcs\ } +#define vline0_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ + .funcs = &vline0_irq_info_funcs\ + } + #define dummy_irq_entry() { .funcs = &dummy_irq_info_funcs } #define i2c_int_entry(reg_num) \ @@ -236,6 +253,8 @@ static const struct irq_source_info irq_source_info_dcn303[DAL_IRQ_SOURCES_NUMBE vupdate_no_lock_int_entry(1), vblank_int_entry(0), vblank_int_entry(1), + vline0_int_entry(0), + vline0_int_entry(1), }; static const struct irq_service_funcs irq_service_funcs_dcn303 = { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c index 776e822abcbb..5e92019539c8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c @@ -40,17 +40,24 @@ void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, void setup_dio_stream_encoder(struct pipe_ctx *pipe_ctx) { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; link_enc->funcs->connect_dig_be_to_fe(link_enc, pipe_ctx->stream_res.stream_enc->id, true); if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_BE); + if (stream_enc->funcs->enable_fifo) + stream_enc->funcs->enable_fifo(stream_enc); } void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; + + if (stream_enc && stream_enc->funcs->disable_fifo) + stream_enc->funcs->disable_fifo(stream_enc); link_enc->funcs->connect_dig_be_to_fe( link_enc, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index ea6cf8bfce30..db7b0b155374 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -116,7 +116,7 @@ static void setup_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst); + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, link_enc->inst); dccg->funcs->enable_symclk32_se(dccg, stream_enc->inst, phyd32clk); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); stream_enc->funcs->enable_stream(stream_enc); @@ -137,7 +137,7 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) stream_enc->funcs->disable(stream_enc); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, stream_enc->inst); - dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, pipe_ctx->link_res.hpo_dp_link_enc->inst); } static void setup_hpo_dp_stream_attribute(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index de193636d022..d7f3619352f0 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -976,7 +976,8 @@ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 { uint16_t vtotal; uint8_t main_pipe_index; uint8_t phantom_pipe_index; - uint8_t padding[2]; + uint8_t is_drr; + uint8_t padding; } subvp_data; struct { @@ -1579,6 +1580,12 @@ enum dmub_cmd_fams_type { DMUB_CMD__FAMS_SETUP_FW_CTRL = 0, DMUB_CMD__FAMS_DRR_UPDATE = 1, DMUB_CMD__HANDLE_SUBVP_CMD = 2, // specifically for SubVP cmd + /** + * For SubVP set manual trigger in FW because it + * triggers DRR_UPDATE_PENDING which SubVP relies + * on (for any SubVP cases that use a DRR display) + */ + DMUB_CMD__FAMS_SET_MANUAL_TRIGGER = 3, }; /** diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1db21d13726d..f175e65b853a 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -249,6 +249,7 @@ enum DC_DEBUG_MASK { DC_DISABLE_CLOCK_GATING = 0x8, DC_DISABLE_PSR = 0x10, DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, + DC_DISABLE_MPO = 0x40, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h new file mode 100644 index 000000000000..b798cf5a2c39 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_10_0_OFFSET_HEADER +#define _umc_8_10_0_OFFSET_HEADER + +#define regUMCCH0_0_GeccErrCntSel 0x0328 +#define regUMCCH0_0_GeccErrCntSel_BASE_IDX 2 +#define regUMCCH0_0_GeccErrCnt 0x0329 +#define regUMCCH0_0_GeccErrCnt_BASE_IDX 2 +#define regMCA_UMC_UMC0_MCUMC_STATUST0 0x03c2 +#define regMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX 2 +#define regMCA_UMC_UMC0_MCUMC_ADDRT0 0x03c4 +#define regMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX 2 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h new file mode 100644 index 000000000000..bd99b431247f --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_10_0_SH_MASK_HEADER +#define _umc_8_10_0_SH_MASK_HEADER + +//UMCCH0_0_GeccErrCntSel +#define UMCCH0_0_GeccErrCntSel__GeccErrInt__SHIFT 0xc +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10 +#define UMCCH0_0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L +//UMCCH0_0_GeccErrCnt +#define UMCCH0_0_GeccErrCnt__GeccErrCnt__SHIFT 0x0 +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10 +#define UMCCH0_0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L +//MCA_UMC_UMC0_MCUMC_STATUST0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt__SHIFT 0x10 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22__SHIFT 0x16 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb__SHIFT 0x18 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30__SHIFT 0x1e +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId__SHIFT 0x20 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38__SHIFT 0x26 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub__SHIFT 0x28 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41__SHIFT 0x29 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison__SHIFT 0x2b +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred__SHIFT 0x2c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC__SHIFT 0x2d +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC__SHIFT 0x2e +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47__SHIFT 0x2f +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent__SHIFT 0x34 +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV__SHIFT 0x35 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54__SHIFT 0x36 +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC__SHIFT 0x37 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC__SHIFT 0x39 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV__SHIFT 0x3a +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV__SHIFT 0x3b +#define MCA_UMC_UMC0_MCUMC_STATUST0__En__SHIFT 0x3c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC__SHIFT 0x3d +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val__SHIFT 0x3f +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode_MASK 0x000000000000FFFFL +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt_MASK 0x00000000003F0000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22_MASK 0x0000000000C00000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb_MASK 0x000000003F000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30_MASK 0x00000000C0000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId_MASK 0x0000003F00000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38_MASK 0x000000C000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub_MASK 0x0000010000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41_MASK 0x0000060000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison_MASK 0x0000080000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred_MASK 0x0000100000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC_MASK 0x0000200000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC_MASK 0x0000400000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47_MASK 0x000F800000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent_MASK 0x0010000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV_MASK 0x0020000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54_MASK 0x0040000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC_MASK 0x0080000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal_MASK 0x0100000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC_MASK 0x0200000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV_MASK 0x0400000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV_MASK 0x0800000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__En_MASK 0x1000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC_MASK 0x2000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow_MASK 0x4000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val_MASK 0x8000000000000000L +//MCA_UMC_UMC0_MCUMC_ADDRT0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL + +#endif diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index 1f9df4e7509b..15943bc21bc5 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -3255,8 +3255,8 @@ ucMaxNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of t ucMinNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all. -usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. -usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. */ diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 524fb09437e5..65624d091ed2 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -45,6 +45,13 @@ enum amdgpu_int_thermal_type { THERMAL_TYPE_KV, }; +enum amdgpu_runpm_mode { + AMDGPU_RUNPM_NONE, + AMDGPU_RUNPM_PX, + AMDGPU_RUNPM_BOCO, + AMDGPU_RUNPM_BACO, +}; + struct amdgpu_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -355,6 +362,8 @@ struct amdgpu_pm { struct amdgpu_ctx *stable_pstate_ctx; struct config_table_setting config_table; + /* runtime mode */ + enum amdgpu_runpm_mode rpm_mode; }; int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors sensor, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fd79b213fab4..6d9b3c6af164 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1415,13 +1415,6 @@ static int smu_disable_dpms(struct smu_context *smu) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): - if (!(adev->in_runpm || amdgpu_in_reset(adev))) { - ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); - if (ret) { - dev_err(adev->dev, "Fail set mp1 state to UNLOAD!\n"); - return ret; - } - } return 0; default: break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index 5becfc1bb2ec..2b672d102c96 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -973,8 +973,8 @@ typedef struct { uint16_t Vmin_Hot_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at hot. uint16_t Vmin_Cold_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at cold. uint16_t Vmin_Aging_Offset[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Worst-case aging margin - uint16_t Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot - uint16_t Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold + uint16_t Spare_Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot + uint16_t Spare_Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold //This is a fixed/minimum VMIN aging degradation offset which is applied at T0. This reflects the minimum amount of aging already accounted for. uint16_t VcBtcFixedVminAgingOffset[PMFW_VOLT_PLANE_COUNT]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 132da684e379..25c08f963f49 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2A +#define SMU13_DRIVER_IF_VERSION 0x2C //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x1E +#define PPTABLE_VERSION 0x20 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -152,6 +152,7 @@ typedef enum { #define DEBUG_OVERRIDE_DISABLE_DFLL 0x00000200 #define DEBUG_OVERRIDE_ENABLE_RLC_VF_BRINGUP_MODE 0x00000400 #define DEBUG_OVERRIDE_DFLL_MASTER_MODE 0x00000800 +#define DEBUG_OVERRIDE_ENABLE_PROFILING_MODE 0x00001000 // VR Mapping Bit Defines #define VR_MAPPING_VR_SELECT_MASK 0x01 @@ -1014,8 +1015,8 @@ typedef struct { uint16_t Vmin_Hot_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at hot. uint16_t Vmin_Cold_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at cold. uint16_t Vmin_Aging_Offset[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Worst-case aging margin - uint16_t Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot - uint16_t Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold + uint16_t Spare_Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot + uint16_t Spare_Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold //This is a fixed/minimum VMIN aging degradation offset which is applied at T0. This reflects the minimum amount of aging already accounted for. uint16_t VcBtcFixedVminAgingOffset[PMFW_VOLT_PLANE_COUNT]; @@ -1081,11 +1082,15 @@ typedef struct { uint16_t GfxclkFreqGfxUlv; // in MHz uint8_t GfxIdlePadding2[2]; - - uint32_t GfxoffSpare[16]; + uint32_t GfxOffEntryHysteresis; //For RLC to count after it enters CGCG, and before triggers GFXOFF entry + uint32_t GfxoffSpare[15]; // GFX GPO - uint32_t GfxGpoSpare[16]; + float DfllBtcMasterScalerM; + int32_t DfllBtcMasterScalerB; + float DfllBtcSlaveScalerM; + int32_t DfllBtcSlaveScalerB; + uint32_t GfxGpoSpare[12]; // GFX DCS @@ -1326,8 +1331,11 @@ typedef struct { uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS + uint8_t FuseWritePowerMuxPresent; + uint8_t FuseWritePadding[3]; + // SECTION: Board Reserved - uint32_t BoardSpare[64]; + uint32_t BoardSpare[63]; // SECTION: Structure Padding diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 038a8956de5b..72b553618116 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,8 +30,8 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2A +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2B +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index b71860e5324a..fa520d79ef67 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -886,6 +886,7 @@ static void sienna_cichlid_stb_init(struct smu_context *smu); static int sienna_cichlid_init_smc_tables(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; int ret = 0; ret = sienna_cichlid_tables_init(smu); @@ -896,7 +897,8 @@ static int sienna_cichlid_init_smc_tables(struct smu_context *smu) if (ret) return ret; - sienna_cichlid_stb_init(smu); + if (!amdgpu_sriov_vf(adev)) + sienna_cichlid_stb_init(smu); return smu_v11_0_init_smc_tables(smu); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index ce2fa04e3926..931c775fe27e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -196,6 +196,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, + TAB_MAP(I2C_COMMANDS), }; static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -310,6 +311,8 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VR0HOT_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT); + return 0; } @@ -1606,9 +1609,182 @@ static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) return true; } +static int smu_v13_0_0_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msg, int num_msgs) +{ + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c_adap); + struct amdgpu_device *adev = smu_i2c->adev; + struct smu_context *smu = adev->powerplay.pp_handle; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = &smu_table->driver_table; + SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; + int i, j, r, c; + u16 dir; + + if (!adev->pm.dpm_enabled) + return -EBUSY; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->I2CcontrollerPort = smu_i2c->port; + req->I2CSpeed = I2C_SPEED_FAST_400K; + req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ + dir = msg[0].flags & I2C_M_RD; + + for (c = i = 0; i < num_msgs; i++) { + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; + + if (!(msg[i].flags & I2C_M_RD)) { + /* write */ + cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; + cmd->ReadWriteData = msg[i].buf[j]; + } + + if ((dir ^ msg[i].flags) & I2C_M_RD) { + /* The direction changes. + */ + dir = msg[i].flags & I2C_M_RD; + cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; + } + + req->NumCmds++; + + /* + * Insert STOP if we are at the last byte of either last + * message for the transaction or the client explicitly + * requires a STOP at this particular message. + */ + if ((j == msg[i].len - 1) && + ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { + cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; + cmd->CmdConfig |= CMDCONFIG_STOP_MASK; + } + } + } + mutex_lock(&adev->pm.mutex); + r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true); + mutex_unlock(&adev->pm.mutex); + if (r) + goto fail; + + for (c = i = 0; i < num_msgs; i++) { + if (!(msg[i].flags & I2C_M_RD)) { + c += msg[i].len; + continue; + } + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; + + msg[i].buf[j] = cmd->ReadWriteData; + } + } + r = num_msgs; +fail: + kfree(req); + return r; +} + +static u32 smu_v13_0_0_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm smu_v13_0_0_i2c_algo = { + .master_xfer = smu_v13_0_0_i2c_xfer, + .functionality = smu_v13_0_0_i2c_func, +}; + +static const struct i2c_adapter_quirks smu_v13_0_0_i2c_control_quirks = { + .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, + .max_read_len = MAX_SW_I2C_COMMANDS, + .max_write_len = MAX_SW_I2C_COMMANDS, + .max_comb_1st_msg_len = 2, + .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, +}; + +static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int res, i; + + for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + smu_i2c->adev = adev; + smu_i2c->port = i; + mutex_init(&smu_i2c->mutex); + control->owner = THIS_MODULE; + control->class = I2C_CLASS_SPD; + control->dev.parent = &adev->pdev->dev; + control->algo = &smu_v13_0_0_i2c_algo; + snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); + control->quirks = &smu_v13_0_0_i2c_control_quirks; + i2c_set_adapdata(control, smu_i2c); + + res = i2c_add_adapter(control); + if (res) { + DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + goto Out_err; + } + } + + /* assign the buses used for the FRU EEPROM and RAS EEPROM */ + /* XXX ideally this would be something in a vbios data table */ + adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter; + adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; + + return 0; +Out_err: + for ( ; i >= 0; i--) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + i2c_del_adapter(control); + } + return res; +} + +static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int i; + + for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + i2c_del_adapter(control); + } + adev->pm.ras_eeprom_i2c_bus = NULL; + adev->pm.fru_eeprom_i2c_bus = NULL; +} + +static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state) +{ + int ret; + + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: + ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; + default: + /* Ignore others */ + ret = 0; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, + .i2c_init = smu_v13_0_0_i2c_control_init, + .i2c_fini = smu_v13_0_0_i2c_control_fini, .is_dpm_running = smu_v13_0_0_is_dpm_running, .dump_pptable = smu_v13_0_0_dump_pptable, .init_microcode = smu_v13_0_init_microcode, @@ -1670,7 +1846,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, - .set_mp1_state = smu_cmn_set_mp1_state, + .set_mp1_state = smu_v13_0_0_set_mp1_state, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 6259a85bc818..9dd56e73218b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 0), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), }; @@ -250,6 +251,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) { *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_IMU_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT); } if (adev->pm.pp_feature & PP_GFXOFF_MASK) @@ -270,6 +272,9 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); + if (adev->pm.pp_feature & PP_ULV_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_ULV_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MM_DPM_BIT); @@ -1545,6 +1550,23 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp return ret; } +static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state) +{ + int ret; + + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: + ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; + default: + /* Ignore others */ + ret = 0; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -1602,7 +1624,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .baco_set_state = smu_v13_0_baco_set_state, .baco_enter = smu_v13_0_baco_enter, .baco_exit = smu_v13_0_baco_exit, - .set_mp1_state = smu_cmn_set_mp1_state, + .set_mp1_state = smu_v13_0_7_set_mp1_state, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 84843b3b3aef..261fcbae88d7 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -693,7 +693,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, } /* !! DONT REMOVE !! - * We don't support vm_id yet, to be sure we don't have have broken + * We don't support vm_id yet, to be sure we don't have broken * userspace, reject anyone trying to use non 0 value thus moving * forward we can use those fields without breaking existant userspace */ |