v3d/tex: don't configure tmu config 1 if not needed
authorAlejandro Piñeiro <apinheiro@igalia.com>
Mon, 13 Apr 2020 09:45:27 +0000 (11:45 +0200)
committerAlejandro Piñeiro <apinheiro@igalia.com>
Wed, 22 Apr 2020 21:38:18 +0000 (23:38 +0200)
TMU configuration parameter 1 configures the sampler for the texture
operation. But there are some texture operations that doesn't need a
sampler. Skipping the configuration could provide a small perf
improvement on OpenGL. On the incoming Vulkan driver, would allow us
to avoid to set up an unneeded sampler.

Note that we still need to add the sampler configuration parameter if
the output is a 32bit, as it is on the sampler where we configure that
info.

Also, note that for images this is done comparing against a unpacked
p1 default. But in order to do that it is needed to go through the
code that fills up the unpacked p1. We can skip that too.

Reviewed-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4677>

src/broadcom/compiler/v3d40_tex.c

index 1ba0ee7dccc21dd03963387b5ea94165ce194bd8..1e40e63f4ea5b41adea16572695859ffa7c56068 100644 (file)
@@ -58,6 +58,29 @@ static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
         .op = V3D_TMU_OP_REGULAR,
 };
 
         .op = V3D_TMU_OP_REGULAR,
 };
 
+/*
+ * This method returns if the texture operation requires a sampler as
+ * a general rule, see the documentation of
+ * nir_tex_instr::sampler_index. Note that the specific hw would
+ * require a sampler in any case, for some other reason.
+ */
+static bool
+texture_instr_need_sampler(nir_tex_instr *instr)
+{
+        switch(instr->op) {
+        case nir_texop_txf:
+        case nir_texop_txf_ms:
+        case nir_texop_txs:
+        case nir_texop_lod:
+        case nir_texop_query_levels:
+        case nir_texop_texture_samples:
+        case nir_texop_samples_identical:
+                return false;
+        default:
+                return true;
+        }
+}
+
 void
 v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 {
 void
 v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 {
@@ -67,14 +90,6 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
         struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
         };
 
         struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
         };
 
-        struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
-                .output_type_32_bit = (c->key->tex[unit].return_size == 32 &&
-                                       !instr->is_shadow),
-
-                .unnormalized_coordinates = (instr->sampler_dim ==
-                                             GLSL_SAMPLER_DIM_RECT),
-        };
-
         struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
                 .op = V3D_TMU_OP_REGULAR,
 
         struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
                 .op = V3D_TMU_OP_REGULAR,
 
@@ -174,14 +189,6 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
         p0_unpacked.return_words_of_texture_data =
                 nir_ssa_def_components_read(&instr->dest.ssa);
 
         p0_unpacked.return_words_of_texture_data =
                 nir_ssa_def_components_read(&instr->dest.ssa);
 
-        /* Word enables can't ask for more channels than the output type could
-         * provide (2 for f16, 4 for 32-bit).
-         */
-        assert(!p1_unpacked.output_type_32_bit ||
-               p0_unpacked.return_words_of_texture_data < (1 << 4));
-        assert(p1_unpacked.output_type_32_bit ||
-               p0_unpacked.return_words_of_texture_data < (1 << 2));
-
         assert(p0_unpacked.return_words_of_texture_data != 0);
 
         uint32_t p0_packed;
         assert(p0_unpacked.return_words_of_texture_data != 0);
 
         uint32_t p0_packed;
@@ -189,26 +196,58 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                                           (uint8_t *)&p0_packed,
                                           &p0_unpacked);
 
                                           (uint8_t *)&p0_packed,
                                           &p0_unpacked);
 
-        uint32_t p1_packed;
-        V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
-                                          (uint8_t *)&p1_packed,
-                                          &p1_unpacked);
-
         uint32_t p2_packed;
         V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
                                           (uint8_t *)&p2_packed,
                                           &p2_unpacked);
 
         uint32_t p2_packed;
         V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
                                           (uint8_t *)&p2_packed,
                                           &p2_unpacked);
 
-        /* Load unit number into the high bits of the texture or sampler
-         * address field, which will be be used by the driver to decide which
-         * texture to put in the actual address field.
+        /* Load unit number into the high bits of the texture address field,
+         * which will be be used by the driver to decide which texture to put
+         * in the actual address field.
          */
         p0_packed |= unit << 24;
          */
         p0_packed |= unit << 24;
-        p1_packed |= unit << 24;
 
         vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P0, p0_packed);
 
         vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P0, p0_packed);
-        /* XXX perf: Can we skip p1 setup for txf ops? */
-        vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed);
+
+        /* Even if the texture operation doesn't need a sampler by
+         * itself, we still need to add the sampler configuration
+         * parameter if the output is 32 bit
+         */
+        bool output_type_32_bit = (c->key->tex[unit].return_size == 32 &&
+                                   !instr->is_shadow);
+
+        if (output_type_32_bit || texture_instr_need_sampler(instr)) {
+                struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
+                        .output_type_32_bit = output_type_32_bit,
+
+                        .unnormalized_coordinates = (instr->sampler_dim ==
+                                                     GLSL_SAMPLER_DIM_RECT),
+                };
+
+                /* Word enables can't ask for more channels than the
+                 * output type could provide (2 for f16, 4 for
+                 * 32-bit).
+                 */
+                assert(!p1_unpacked.output_type_32_bit ||
+                       p0_unpacked.return_words_of_texture_data < (1 << 4));
+                assert(p1_unpacked.output_type_32_bit ||
+                       p0_unpacked.return_words_of_texture_data < (1 << 2));
+
+                uint32_t p1_packed;
+                V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+                                                  (uint8_t *)&p1_packed,
+                                                  &p1_unpacked);
+
+                /* Load unit number into the high bits of the sampler
+                 * address field, which will be be used by the driver
+                 * to decide which sampler to put in the actual
+                 * address field.
+                 */
+                p1_packed |= unit << 24;
+
+                vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed);
+        }
+
         if (memcmp(&p2_unpacked, &p2_unpacked_default, sizeof(p2_unpacked)) != 0)
                 vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);
 
         if (memcmp(&p2_unpacked, &p2_unpacked_default, sizeof(p2_unpacked)) != 0)
                 vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);