v3d/tex: don't configure tmu config 1 if not needed
[mesa.git] / src / broadcom / compiler / v3d40_tex.c
index 1ba0ee7dccc21dd03963387b5ea94165ce194bd8..1e40e63f4ea5b41adea16572695859ffa7c56068 100644 (file)
@@ -58,6 +58,29 @@ static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
         .op = V3D_TMU_OP_REGULAR,
 };
 
+/*
+ * This method returns if the texture operation requires a sampler as
+ * a general rule, see the documentation of
+ * nir_tex_instr::sampler_index. Note that the specific hw would
+ * require a sampler in any case, for some other reason.
+ */
+static bool
+texture_instr_need_sampler(nir_tex_instr *instr)
+{
+        switch(instr->op) {
+        case nir_texop_txf:
+        case nir_texop_txf_ms:
+        case nir_texop_txs:
+        case nir_texop_lod:
+        case nir_texop_query_levels:
+        case nir_texop_texture_samples:
+        case nir_texop_samples_identical:
+                return false;
+        default:
+                return true;
+        }
+}
+
 void
 v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 {
@@ -67,14 +90,6 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
         struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
         };
 
-        struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
-                .output_type_32_bit = (c->key->tex[unit].return_size == 32 &&
-                                       !instr->is_shadow),
-
-                .unnormalized_coordinates = (instr->sampler_dim ==
-                                             GLSL_SAMPLER_DIM_RECT),
-        };
-
         struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
                 .op = V3D_TMU_OP_REGULAR,
 
@@ -174,14 +189,6 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
         p0_unpacked.return_words_of_texture_data =
                 nir_ssa_def_components_read(&instr->dest.ssa);
 
-        /* Word enables can't ask for more channels than the output type could
-         * provide (2 for f16, 4 for 32-bit).
-         */
-        assert(!p1_unpacked.output_type_32_bit ||
-               p0_unpacked.return_words_of_texture_data < (1 << 4));
-        assert(p1_unpacked.output_type_32_bit ||
-               p0_unpacked.return_words_of_texture_data < (1 << 2));
-
         assert(p0_unpacked.return_words_of_texture_data != 0);
 
         uint32_t p0_packed;
@@ -189,26 +196,58 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                                           (uint8_t *)&p0_packed,
                                           &p0_unpacked);
 
-        uint32_t p1_packed;
-        V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
-                                          (uint8_t *)&p1_packed,
-                                          &p1_unpacked);
-
         uint32_t p2_packed;
         V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
                                           (uint8_t *)&p2_packed,
                                           &p2_unpacked);
 
-        /* Load unit number into the high bits of the texture or sampler
-         * address field, which will be be used by the driver to decide which
-         * texture to put in the actual address field.
+        /* Load unit number into the high bits of the texture address field,
+         * which will be be used by the driver to decide which texture to put
+         * in the actual address field.
          */
         p0_packed |= unit << 24;
-        p1_packed |= unit << 24;
 
         vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P0, p0_packed);
-        /* XXX perf: Can we skip p1 setup for txf ops? */
-        vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed);
+
+        /* Even if the texture operation doesn't need a sampler by
+         * itself, we still need to add the sampler configuration
+         * parameter if the output is 32 bit
+         */
+        bool output_type_32_bit = (c->key->tex[unit].return_size == 32 &&
+                                   !instr->is_shadow);
+
+        if (output_type_32_bit || texture_instr_need_sampler(instr)) {
+                struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
+                        .output_type_32_bit = output_type_32_bit,
+
+                        .unnormalized_coordinates = (instr->sampler_dim ==
+                                                     GLSL_SAMPLER_DIM_RECT),
+                };
+
+                /* Word enables can't ask for more channels than the
+                 * output type could provide (2 for f16, 4 for
+                 * 32-bit).
+                 */
+                assert(!p1_unpacked.output_type_32_bit ||
+                       p0_unpacked.return_words_of_texture_data < (1 << 4));
+                assert(p1_unpacked.output_type_32_bit ||
+                       p0_unpacked.return_words_of_texture_data < (1 << 2));
+
+                uint32_t p1_packed;
+                V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+                                                  (uint8_t *)&p1_packed,
+                                                  &p1_unpacked);
+
+                /* Load unit number into the high bits of the sampler
+                 * address field, which will be be used by the driver
+                 * to decide which sampler to put in the actual
+                 * address field.
+                 */
+                p1_packed |= unit << 24;
+
+                vir_WRTMUC(c, QUNIFORM_TMU_CONFIG_P1, p1_packed);
+        }
+
         if (memcmp(&p2_unpacked, &p2_unpacked_default, sizeof(p2_unpacked)) != 0)
                 vir_WRTMUC(c, QUNIFORM_CONSTANT, p2_packed);