From dc698fb5dc3d4043d0c7908b5703b92ad7056e86 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 8 Jun 2020 18:16:13 +0200 Subject: [PATCH] radv: add support for Sienna Cichlid Bits copied from RadeonSI. Totally untested. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_device.c | 5 +++- src/amd/vulkan/radv_image.c | 26 +++++++++++++++---- src/amd/vulkan/radv_pipeline.c | 12 +++++++-- src/amd/vulkan/si_cmd_buffer.c | 4 +++ src/amd/vulkan/winsys/null/radv_null_winsys.c | 8 ++++-- 5 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index f9afd722dd6..07d66892bc8 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -3679,7 +3679,10 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff } *max_offchip_buffers_p = max_offchip_buffers; - if (device->physical_device->rad_info.chip_class >= GFX7) { + if (device->physical_device->rad_info.chip_class >= GFX10_3) { + hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) | + S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity); + } else if (device->physical_device->rad_info.chip_class >= GFX7) { if (device->physical_device->rad_info.chip_class >= GFX8) --max_offchip_buffers; hs_offchip_param = diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 8c65eb6bc37..1cbe60adbdc 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -833,11 +833,27 @@ gfx10_make_texture_descriptor(struct radv_device *device, last_level) | S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) | S_00A00C_TYPE(type); - /* Depth is the the last accessible layer on gfx9+. The hw doesn't need - * to know the total number of layers. - */ - state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) | - S_00A010_BASE_ARRAY(first_layer); + + if (type == V_008F1C_SQ_RSRC_IMG_1D || + type == V_008F1C_SQ_RSRC_IMG_2D || + type == V_008F1C_SQ_RSRC_IMG_2D_MSAA) { + /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader + * resources starting with gfx10.3 (ignored if pitch <= + * width). Other texture targets can't. CB and DB can't set a + * custom pitch for any target. + * */ + if (device->physical_device->rad_info.chip_class >= GFX10_3) + state[4] = S_00A010_DEPTH(image->planes[0].surface.u.gfx9.surf_pitch - 1); + else + state[4] = 0; + } else { + /* Depth is the the last accessible layer on gfx9+. The hw doesn't need + * to know the total number of layers. + */ + state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) | + S_00A010_BASE_ARRAY(first_layer); + } + state[5] = S_00A014_ARRAY_PITCH(0) | S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples) : diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index fcc23aacd37..312611adc07 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1170,7 +1170,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline, S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); ms->pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(log_samples) | S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) | - S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); /* CM_R_028BE0_PA_SC_AA_CONFIG */ + S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */ + S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3); ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); if (ps_iter_samples > 1) pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); @@ -3589,6 +3590,9 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */ db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2); + + if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) + db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE_GFX103(2); } if (has_stencil_attachment && vkds && vkds->stencilTestEnable) { @@ -3884,6 +3888,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | + S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | cull_dist_mask << 8 | clip_dist_mask); @@ -4003,6 +4008,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | + S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3) | cull_dist_mask << 8 | clip_dist_mask); @@ -4041,7 +4047,9 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, */ radeon_set_context_reg(ctx_cs, R_028838_PA_CL_NGG_CNTL, S_028838_INDEX_BUF_EDGE_FLAG_ENA(!radv_pipeline_has_tess(pipeline) && - !radv_pipeline_has_gs(pipeline))); + !radv_pipeline_has_gs(pipeline)) | + /* Reuse for NGG. */ + S_028838_VERTEX_REUSE_DEPTH_GFX103(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 30 : 0)); ge_cntl = S_03096C_PRIM_GRP_SIZE(ngg_state->max_gsprims) | S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */ diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index bef579f3ad2..d53aaf33144 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -418,6 +418,10 @@ si_emit_graphics(struct radv_device *device, S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); + if (physical_device->rad_info.chip_class >= GFX10_3) { + radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff); + } + if (physical_device->rad_info.chip_class == GFX10) { /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.c b/src/amd/vulkan/winsys/null/radv_null_winsys.c index cb9a10d51f9..462ed8bfd1e 100644 --- a/src/amd/vulkan/winsys/null/radv_null_winsys.c +++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c @@ -80,7 +80,9 @@ static void radv_null_winsys_query_info(struct radeon_winsys *rws, info->family = i; info->name = "OVERRIDDEN"; - if (i >= CHIP_NAVI10) + if (i >= CHIP_SIENNA) + info->chip_class = GFX10_3; + else if (i >= CHIP_NAVI10) info->chip_class = GFX10; else if (i >= CHIP_VEGA10) info->chip_class = GFX9; @@ -101,7 +103,9 @@ static void radv_null_winsys_query_info(struct radeon_winsys *rws, info->pci_id = gpu_info[info->family].pci_id; info->has_syncobj_wait_for_submit = true; info->max_se = 4; - if (info->chip_class >= GFX10) + if (info->chip_class >= GFX10_3) + info->max_wave64_per_simd = 16; + else if (info->chip_class >= GFX10) info->max_wave64_per_simd = 20; else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM) info->max_wave64_per_simd = 8; -- 2.30.2