From 98e866c66953875a170cfff79a3c2f3c79460ed2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 15 Aug 2020 04:39:30 -0400 Subject: [PATCH] radeonsi: optimize out the loop in si_get_ps_input_cntl Use a remap table from a semantic to an index instead of searching for the correct index. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.h | 1 + src/gallium/drivers/radeonsi/si_shader_nir.c | 4 ++ .../drivers/radeonsi/si_state_shaders.c | 65 +++++++++---------- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 7ad282a651f..8694bc58ab7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -327,6 +327,7 @@ struct si_shader_info { ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS]; + char output_semantic_to_slot[VARYING_SLOT_TESS_MAX]; ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS]; ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS]; ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS]; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 1cf3f591248..a2f3af831ad 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -143,12 +143,14 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr } else { /* Outputs. */ assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); + assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot)); for (unsigned i = 0; i < num_slots; i++) { unsigned loc = driver_location + i; unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf; info->output_semantic[loc] = semantic + i; + info->output_semantic_to_slot[semantic + i] = loc; if (is_output_load) { /* Output loads have only a few things that we need to track. */ @@ -556,6 +558,8 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir); } + memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot)); + func = (struct nir_function *)exec_list_get_head_const(&nir->functions); nir_foreach_block (block, func->impl) { nir_foreach_instr (instr, block) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 3e72e1fc37c..fb6ab7e0ee2 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -3171,7 +3171,7 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader * unsigned semantic, enum glsl_interp_mode interpolate) { struct si_shader_info *vsinfo = &vs->selector->info; - unsigned j, offset, ps_input_cntl = 0; + unsigned offset, ps_input_cntl = 0; if (interpolate == INTERP_MODE_FLAT || (interpolate == INTERP_MODE_COLOR && sctx->flatshade) || @@ -3184,43 +3184,42 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader * ps_input_cntl |= S_028644_PT_SPRITE_TEX(1); } - /* TODO: This search can be removed if we add a lookup table from semantic to index. */ - for (j = 0; j < vsinfo->num_outputs; j++) { - if (semantic == vsinfo->output_semantic[j]) { - offset = vs->info.vs_output_param_offset[j]; - - if (offset <= AC_EXP_PARAM_OFFSET_31) { - /* The input is loaded from parameter memory. */ - ps_input_cntl |= S_028644_OFFSET(offset); - } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) { - if (offset == AC_EXP_PARAM_UNDEFINED) { - /* This can happen with depth-only rendering. */ - offset = 0; - } else { - /* The input is a DEFAULT_VAL constant. */ - assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && - offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); - offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; - } + int vs_slot = vsinfo->output_semantic_to_slot[semantic]; + if (vs_slot >= 0) { + offset = vs->info.vs_output_param_offset[vs_slot]; - ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset); + if (offset <= AC_EXP_PARAM_OFFSET_31) { + /* The input is loaded from parameter memory. */ + ps_input_cntl |= S_028644_OFFSET(offset); + } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) { + if (offset == AC_EXP_PARAM_UNDEFINED) { + /* This can happen with depth-only rendering. */ + offset = 0; + } else { + /* The input is a DEFAULT_VAL constant. */ + assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && + offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); + offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; } - break; + + ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset); + } + } else { + /* VS output not found. */ + if (semantic == VARYING_SLOT_PRIMITIVE_ID) { + /* PrimID is written after the last output when HW VS is used. */ + ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]); + } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) { + /* No corresponding output found, load defaults into input. + * Don't set any other bits. + * (FLAT_SHADE=1 completely changes behavior) */ + ps_input_cntl = S_028644_OFFSET(0x20); + /* D3D 9 behaviour. GL is undefined */ + if (semantic == VARYING_SLOT_COL0) + ps_input_cntl |= S_028644_DEFAULT_VAL(3); } } - if (j == vsinfo->num_outputs && semantic == VARYING_SLOT_PRIMITIVE_ID) - /* PrimID is written after the last output when HW VS is used. */ - ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]); - else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) { - /* No corresponding output found, load defaults into input. - * Don't set any other bits. - * (FLAT_SHADE=1 completely changes behavior) */ - ps_input_cntl = S_028644_OFFSET(0x20); - /* D3D 9 behaviour. GL is undefined */ - if (semantic == VARYING_SLOT_COL0) - ps_input_cntl |= S_028644_DEFAULT_VAL(3); - } return ps_input_cntl; } -- 2.30.2