From 92e9c14a6a8d536404ef5b41217662bb2286d946 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 28 Mar 2017 11:43:48 +1000 Subject: [PATCH] radv: move calculating fragment shader i/os to pipeline. There is no need to calculate this on each command submit. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 67 ++---------------------------- src/amd/vulkan/radv_pipeline.c | 71 ++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 2 + 3 files changed, 77 insertions(+), 63 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c3b141ea3a6..92e68efa861 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -669,18 +669,13 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radv_shader_variant *ps, *vs; + struct radv_shader_variant *ps; uint64_t va; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); struct radv_blend_state *blend = &pipeline->graphics.blend; - unsigned ps_offset = 0; - struct ac_vs_output_info *outinfo; assert (pipeline->shaders[MESA_SHADER_FRAGMENT]); ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; - vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX]; - - outinfo = &vs->info.vs.outinfo; va = ws->buffer_get_va(ps->bo); ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); @@ -716,63 +711,9 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); radeon_set_context_reg(cmd_buffer->cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); - if (ps->info.fs.has_pcoord) { - unsigned val; - val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ps_offset++; - } - - if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) { - unsigned vs_offset, flat_shade; - unsigned val; - vs_offset = outinfo->prim_id_output; - flat_shade = true; - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ++ps_offset; - } - - if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) { - unsigned vs_offset, flat_shade; - unsigned val; - vs_offset = outinfo->layer_output; - flat_shade = true; - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ++ps_offset; - } - - for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { - unsigned vs_offset, flat_shade; - unsigned val; - - if (!(ps->info.fs.input_mask & (1u << i))) - continue; - - - if (!(outinfo->export_mask & (1u << i))) { - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, - S_028644_OFFSET(0x20)); - ++ps_offset; - continue; - } - - vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1)); - if (outinfo->prim_id_output != 0xffffffff) { - if (vs_offset >= outinfo->prim_id_output) - vs_offset++; - } - if (outinfo->layer_output != 0xffffffff) { - if (vs_offset >= outinfo->layer_output) - vs_offset++; - } - flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); - - val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); - radeon_set_context_reg(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0 + 4 * ps_offset, val); - ++ps_offset; - } + radeon_set_context_reg_seq(cmd_buffer->cs, R_028644_SPI_PS_INPUT_CNTL_0, pipeline->graphics.ps_input_cntl_num); + for (unsigned i = 0; i < pipeline->graphics.ps_input_cntl_num; i++) + radeon_emit(cmd_buffer->cs, pipeline->graphics.ps_input_cntl[i]); } static void diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 550b773e9a5..c7d74805a27 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1527,6 +1527,76 @@ static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs) S_028A40_GS_WRITE_OPTIMIZE(1); } +static void calculate_ps_inputs(struct radv_pipeline *pipeline) +{ + struct radv_shader_variant *ps, *vs; + struct ac_vs_output_info *outinfo; + + ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; + vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : pipeline->shaders[MESA_SHADER_VERTEX]; + + outinfo = &vs->info.vs.outinfo; + + unsigned ps_offset = 0; + if (ps->info.fs.has_pcoord) { + unsigned val; + val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); + pipeline->graphics.ps_input_cntl[ps_offset] = val; + ps_offset++; + } + + if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) { + unsigned vs_offset, flat_shade; + unsigned val; + vs_offset = outinfo->prim_id_output; + flat_shade = true; + val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); + pipeline->graphics.ps_input_cntl[ps_offset] = val; + ++ps_offset; + } + + if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) { + unsigned vs_offset, flat_shade; + unsigned val; + vs_offset = outinfo->layer_output; + flat_shade = true; + val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); + pipeline->graphics.ps_input_cntl[ps_offset] = val; + ++ps_offset; + } + + for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) { + unsigned vs_offset, flat_shade; + unsigned val; + + if (!(ps->info.fs.input_mask & (1u << i))) + continue; + + if (!(outinfo->export_mask & (1u << i))) { + pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20); + ++ps_offset; + continue; + } + + vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1)); + if (outinfo->prim_id_output != 0xffffffff) { + if (vs_offset >= outinfo->prim_id_output) + vs_offset++; + } + if (outinfo->layer_output != 0xffffffff) { + if (vs_offset >= outinfo->layer_output) + vs_offset++; + } + flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset)); + + val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade); + pipeline->graphics.ps_input_cntl[ps_offset] = val; + ++ps_offset; + } + + pipeline->graphics.ps_input_cntl_num = ps_offset; +} + VkResult radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device, @@ -1672,6 +1742,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline, ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R : V_028710_SPI_SHADER_ZERO; + calculate_ps_inputs(pipeline); const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index dff0aef8328..bf3d19ce9e6 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -963,6 +963,8 @@ struct radv_pipeline { bool prim_restart_enable; unsigned esgs_ring_size; unsigned gsvs_ring_size; + uint32_t ps_input_cntl[32]; + uint32_t ps_input_cntl_num; struct radv_prim_vertex_count prim_vertex_count; } graphics; }; -- 2.30.2