From: Alejandro PiƱeiro Date: Mon, 13 Apr 2020 09:45:27 +0000 (+0200) Subject: v3d/tex: don't configure tmu config 1 if not needed X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d0b644d9f9d9673d3fe28c8c200209f553adeda1;p=mesa.git v3d/tex: don't configure tmu config 1 if not needed TMU configuration parameter 1 configures the sampler for the texture operation. But there are some texture operations that doesn't need a sampler. Skipping the configuration could provide a small perf improvement on OpenGL. On the incoming Vulkan driver, would allow us to avoid to set up an unneeded sampler. Note that we still need to add the sampler configuration parameter if the output is a 32bit, as it is on the sampler where we configure that info. Also, note that for images this is done comparing against a unpacked p1 default. But in order to do that it is needed to go through the code that fills up the unpacked p1. We can skip that too. Reviewed-by: Eric Anholt Part-of: --- diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c index 1ba0ee7dccc..1e40e63f4ea 100644 --- a/src/broadcom/compiler/v3d40_tex.c +++ b/src/broadcom/compiler/v3d40_tex.c @@ -58,6 +58,29 @@ static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = { .op = V3D_TMU_OP_REGULAR, }; +/* + * This method returns if the texture operation requires a sampler as + * a general rule, see the documentation of + * nir_tex_instr::sampler_index. Note that the specific hw would + * require a sampler in any case, for some other reason. + */ +static bool +texture_instr_need_sampler(nir_tex_instr *instr) +{ + switch(instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + return false; + default: + return true; + } +} + void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) { @@ -67,14 +90,6 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = { }; - struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = { - .output_type_32_bit = (c->key->tex[unit].return_size == 32 && - !instr->is_shadow), - - .unnormalized_coordinates = (instr->sampler_dim == - GLSL_SAMPLER_DIM_RECT), - }; - struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { .op = V3D_TMU_OP_REGULAR, @@ -174,14 +189,6 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) p0_unpacked.return_words_of_texture_data = nir_ssa_def_components_read(&instr->dest.ssa); - /* Word enables can't ask for more channels than the output type could - * provide (2 for f16, 4 for 32-bit). - */ - assert(!p1_unpacked.output_type_32_bit || - p0_unpacked.return_words_of_texture_data < (1 << 4)); - assert(p1_unpacked.output_type_32_bit || - p0_unpacked.return_words_of_texture_data < (1 << 2)); - assert(p0_unpacked.return_words_of_texture_data != 0); uint32_t p0_packed; @@ -189,26 +196,58 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) (uint8_t *)&p0_packed, &p0_unpacked); - uint32_t p1_packed; - V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL, - (uint8_t *)&p1_packed, - &p1_unpacked); - uint32_t p2_packed; V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL, (uint8_t *)&p2_packed, &p2_unpacked); - /* Load unit number into the high bits of the texture or sampler - * address field, which will be be used by the driver to decide which - * texture to put in the actual address field. + /* Load unit number into the high bits of the texture address field, + * which will be be used by the driver to decide which texture to put + * in the actual address field. */ p0_packed |= unit << 24; - p1_packed |= unit << 24; vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P0, p0_packed); - /* XXX perf: Can we skip p1 setup for txf ops? */ - vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed); + + /* Even if the texture operation doesn't need a sampler by + * itself, we still need to add the sampler configuration + * parameter if the output is 32 bit + */ + bool output_type_32_bit = (c->key->tex[unit].return_size == 32 && + !instr->is_shadow); + + if (output_type_32_bit || texture_instr_need_sampler(instr)) { + struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = { + .output_type_32_bit = output_type_32_bit, + + .unnormalized_coordinates = (instr->sampler_dim == + GLSL_SAMPLER_DIM_RECT), + }; + + /* Word enables can't ask for more channels than the + * output type could provide (2 for f16, 4 for + * 32-bit). + */ + assert(!p1_unpacked.output_type_32_bit || + p0_unpacked.return_words_of_texture_data < (1 << 4)); + assert(p1_unpacked.output_type_32_bit || + p0_unpacked.return_words_of_texture_data < (1 << 2)); + + uint32_t p1_packed; + V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL, + (uint8_t *)&p1_packed, + &p1_unpacked); + + /* Load unit number into the high bits of the sampler + * address field, which will be be used by the driver + * to decide which sampler to put in the actual + * address field. + */ + p1_packed |= unit << 24; + + vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed); + } + if (memcmp(&p2_unpacked, &p2_unpacked_default, sizeof(p2_unpacked)) != 0) vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);