freedreno/ir3: Improve shader key normalization.
authorEric Anholt <eric@anholt.net>
Tue, 14 Apr 2020 23:34:00 +0000 (16:34 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 1 May 2020 16:26:32 +0000 (16:26 +0000)
We can remove a bunch of conditional code at key comparison time by
computing a bitmask of used key bits at ir3_shader creation time.  This
also gives us a nice place to put additional key simplification to reduce
how many variants we create (like skipping rastflat if we don't read
colors in the FS, or skipping vclamp_color if we don't write colors).

It does mean walking the whole key to AND it, but the key is just 28 bytes
so far so that seems pretty fine.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4562>

src/compiler/shader_enums.h
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h
src/gallium/drivers/freedreno/ir3/ir3_gallium.c

index 56062894a947795c56beaaf3ca17ab28c784800d..b33a91727a157ea599686fb5e6efa084ddbd08bc 100644 (file)
@@ -329,6 +329,10 @@ const char *gl_varying_slot_name(gl_varying_slot slot);
 #define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
 #define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
 #define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
 #define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
 #define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
 #define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
+#define VARYING_BITS_COLOR (VARYING_BIT_COL0 | \
+                            VARYING_BIT_COL1 |        \
+                            VARYING_BIT_BFC0 |        \
+                            VARYING_BIT_BFC1)
 #define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
 #define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
 #define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
 #define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
 #define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
 #define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
index 676c90e078057daf2ef06bc1c5801ce6dc013ae0..e8e95fce3ae1c02d99626770bac676f40b9c39ee 100644 (file)
@@ -289,6 +289,62 @@ ir3_shader_destroy(struct ir3_shader *shader)
        free(shader);
 }
 
        free(shader);
 }
 
+/**
+ * Creates a bitmask of the used bits of the shader key by this particular
+ * shader.  Used by the gallium driver to skip state-dependent recompiles when
+ * possible.
+ */
+static void
+ir3_setup_used_key(struct ir3_shader *shader)
+{
+       nir_shader *nir = shader->nir;
+       struct shader_info *info = &nir->info;
+       struct ir3_shader_key *key = &shader->key_mask;
+
+       /* This key flag is just used to make for a cheaper ir3_shader_key_equal
+        * check in the common case.
+        */
+       key->has_per_samp = true;
+
+       if (info->stage == MESA_SHADER_FRAGMENT) {
+               key->fsaturate_s = ~0;
+               key->fsaturate_t = ~0;
+               key->fsaturate_r = ~0;
+               key->fastc_srgb = ~0;
+               key->fsamples = ~0;
+
+               if (info->inputs_read & VARYING_BITS_COLOR) {
+                       key->rasterflat = true;
+                       key->color_two_side = true;
+               }
+
+               if ((info->outputs_written & ~(FRAG_RESULT_DEPTH |
+                                                               FRAG_RESULT_STENCIL |
+                                                               FRAG_RESULT_SAMPLE_MASK)) != 0) {
+                       key->fclamp_color = true;
+               }
+
+               /* Only used for deciding on behavior of
+                * nir_intrinsic_load_barycentric_sample
+                */
+               key->msaa = info->fs.uses_sample_qualifier;
+       } else {
+               key->tessellation = ~0;
+               key->has_gs = true;
+
+               if (info->outputs_written & VARYING_BITS_COLOR)
+                       key->vclamp_color = true;
+
+               if (info->stage == MESA_SHADER_VERTEX) {
+                       key->vsaturate_s = ~0;
+                       key->vsaturate_t = ~0;
+                       key->vsaturate_r = ~0;
+                       key->vastc_srgb = ~0;
+                       key->vsamples = ~0;
+               }
+       }
+}
+
 struct ir3_shader *
 ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
                struct ir3_stream_output_info *stream_output)
 struct ir3_shader *
 ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
                struct ir3_stream_output_info *stream_output)
@@ -336,6 +392,8 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
                nir_print_shader(shader->nir, stdout);
        }
 
                nir_print_shader(shader->nir, stdout);
        }
 
+       ir3_setup_used_key(shader);
+
        return shader;
 }
 
        return shader;
 }
 
index 3b9b3f29204c7c1827fa1d697c51e3243320006d..ecb3948192732ac0c26b9296a49fdb2d20ef6635 100644 (file)
@@ -364,62 +364,6 @@ ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *las
        return false;
 }
 
        return false;
 }
 
-/* clears shader-key flags which don't apply to the given shader
- * stage
- */
-static inline void
-ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type)
-{
-       switch (type) {
-       case MESA_SHADER_FRAGMENT:
-               if (key->has_per_samp) {
-                       key->vsaturate_s = 0;
-                       key->vsaturate_t = 0;
-                       key->vsaturate_r = 0;
-                       key->vastc_srgb = 0;
-                       key->vsamples = 0;
-                       key->has_gs = false; /* FS doesn't care */
-                       key->tessellation = IR3_TESS_NONE;
-               }
-               break;
-       case MESA_SHADER_VERTEX:
-       case MESA_SHADER_GEOMETRY:
-               key->color_two_side = false;
-               key->rasterflat = false;
-               if (key->has_per_samp) {
-                       key->fsaturate_s = 0;
-                       key->fsaturate_t = 0;
-                       key->fsaturate_r = 0;
-                       key->fastc_srgb = 0;
-                       key->fsamples = 0;
-               }
-
-               /* VS and GS only care about whether or not we're tessellating. */
-               key->tessellation = !!key->tessellation;
-               break;
-       case MESA_SHADER_TESS_CTRL:
-       case MESA_SHADER_TESS_EVAL:
-               key->color_two_side = false;
-               key->rasterflat = false;
-               if (key->has_per_samp) {
-                       key->fsaturate_s = 0;
-                       key->fsaturate_t = 0;
-                       key->fsaturate_r = 0;
-                       key->fastc_srgb = 0;
-                       key->fsamples = 0;
-                       key->vsaturate_s = 0;
-                       key->vsaturate_t = 0;
-                       key->vsaturate_r = 0;
-                       key->vastc_srgb = 0;
-                       key->vsamples = 0;
-               }
-               break;
-       default:
-               /* TODO */
-               break;
-       }
-}
-
 /**
  * On a4xx+a5xx, Images share state with textures and SSBOs:
  *
 /**
  * On a4xx+a5xx, Images share state with textures and SSBOs:
  *
@@ -674,6 +618,11 @@ struct ir3_shader {
 
        /* Map from driver_location to byte offset in per-primitive storage */
        unsigned output_loc[32];
 
        /* Map from driver_location to byte offset in per-primitive storage */
        unsigned output_loc[32];
+
+       /* Bitmask of bits of the shader key used by this shader.  Used to avoid
+        * recompiles for GL NOS that doesn't actually apply to the shader.
+        */
+       struct ir3_shader_key key_mask;
 };
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
 };
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
@@ -692,6 +641,18 @@ ir3_glsl_type_size(const struct glsl_type *type, bool bindless);
  * Helper/util:
  */
 
  * Helper/util:
  */
 
+/* clears shader-key flags which don't apply to the given shader.
+ */
+static inline void
+ir3_key_clear_unused(struct ir3_shader_key *key, struct ir3_shader *shader)
+{
+       uint32_t *key_bits = (uint32_t *)key;
+       uint32_t *key_mask = (uint32_t *)&shader->key_mask;
+       STATIC_ASSERT(sizeof(*key) % 4 == 0);
+       for (int i = 0; i < sizeof(*key) >> 2; i++)
+               key_bits[i] &= key_mask[i];
+}
+
 static inline int
 ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
 {
 static inline int
 ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot)
 {
index 6e31fa0195ac6b42d42381fb4e89f6c5cccc8c3b..2f7a01603aa329bbd588126a046079c381cf7e4b 100644 (file)
@@ -77,11 +77,11 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key,
        struct ir3_shader_variant *v;
        bool created = false;
 
        struct ir3_shader_variant *v;
        bool created = false;
 
-       /* some shader key values only apply to vertex or frag shader,
-        * so normalize the key to avoid constructing multiple identical
-        * variants:
+       /* Some shader key values may not be used by a given ir3_shader (for
+        * example, fragment shader saturates in the vertex shader), so clean out
+        * those flags to avoid recompiling.
         */
         */
-       ir3_normalize_key(&key, shader->type);
+       ir3_key_clear_unused(&key, shader);
 
        v = ir3_shader_get_variant(shader, &key, binning_pass, &created);
 
 
        v = ir3_shader_get_variant(shader, &key, binning_pass, &created);