From 8c1c21890969ce0f6e0df28522f04cdcd7dd482f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 14 Apr 2020 16:34:00 -0700 Subject: [PATCH] freedreno/ir3: Improve shader key normalization. We can remove a bunch of conditional code at key comparison time by computing a bitmask of used key bits at ir3_shader creation time. This also gives us a nice place to put additional key simplification to reduce how many variants we create (like skipping rastflat if we don't read colors in the FS, or skipping vclamp_color if we don't write colors). It does mean walking the whole key to AND it, but the key is just 28 bytes so far so that seems pretty fine. Part-of: --- src/compiler/shader_enums.h | 4 + src/freedreno/ir3/ir3_shader.c | 58 +++++++++++++++ src/freedreno/ir3/ir3_shader.h | 73 +++++-------------- .../drivers/freedreno/ir3/ir3_gallium.c | 8 +- 4 files changed, 83 insertions(+), 60 deletions(-) diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index 56062894a94..b33a91727a1 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -329,6 +329,10 @@ const char *gl_varying_slot_name(gl_varying_slot slot); #define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ) #define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0) #define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1) +#define VARYING_BITS_COLOR (VARYING_BIT_COL0 | \ + VARYING_BIT_COL1 | \ + VARYING_BIT_BFC0 | \ + VARYING_BIT_BFC1) #define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE) #define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX) #define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 676c90e0780..e8e95fce3ae 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -289,6 +289,62 @@ ir3_shader_destroy(struct ir3_shader *shader) free(shader); } +/** + * Creates a bitmask of the used bits of the shader key by this particular + * shader. Used by the gallium driver to skip state-dependent recompiles when + * possible. + */ +static void +ir3_setup_used_key(struct ir3_shader *shader) +{ + nir_shader *nir = shader->nir; + struct shader_info *info = &nir->info; + struct ir3_shader_key *key = &shader->key_mask; + + /* This key flag is just used to make for a cheaper ir3_shader_key_equal + * check in the common case. + */ + key->has_per_samp = true; + + if (info->stage == MESA_SHADER_FRAGMENT) { + key->fsaturate_s = ~0; + key->fsaturate_t = ~0; + key->fsaturate_r = ~0; + key->fastc_srgb = ~0; + key->fsamples = ~0; + + if (info->inputs_read & VARYING_BITS_COLOR) { + key->rasterflat = true; + key->color_two_side = true; + } + + if ((info->outputs_written & ~(FRAG_RESULT_DEPTH | + FRAG_RESULT_STENCIL | + FRAG_RESULT_SAMPLE_MASK)) != 0) { + key->fclamp_color = true; + } + + /* Only used for deciding on behavior of + * nir_intrinsic_load_barycentric_sample + */ + key->msaa = info->fs.uses_sample_qualifier; + } else { + key->tessellation = ~0; + key->has_gs = true; + + if (info->outputs_written & VARYING_BITS_COLOR) + key->vclamp_color = true; + + if (info->stage == MESA_SHADER_VERTEX) { + key->vsaturate_s = ~0; + key->vsaturate_t = ~0; + key->vsaturate_r = ~0; + key->vastc_srgb = ~0; + key->vsamples = ~0; + } + } +} + struct ir3_shader * ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, struct ir3_stream_output_info *stream_output) @@ -336,6 +392,8 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, nir_print_shader(shader->nir, stdout); } + ir3_setup_used_key(shader); + return shader; } diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 3b9b3f29204..ecb39481927 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -364,62 +364,6 @@ ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *las return false; } -/* clears shader-key flags which don't apply to the given shader - * stage - */ -static inline void -ir3_normalize_key(struct ir3_shader_key *key, gl_shader_stage type) -{ - switch (type) { - case MESA_SHADER_FRAGMENT: - if (key->has_per_samp) { - key->vsaturate_s = 0; - key->vsaturate_t = 0; - key->vsaturate_r = 0; - key->vastc_srgb = 0; - key->vsamples = 0; - key->has_gs = false; /* FS doesn't care */ - key->tessellation = IR3_TESS_NONE; - } - break; - case MESA_SHADER_VERTEX: - case MESA_SHADER_GEOMETRY: - key->color_two_side = false; - key->rasterflat = false; - if (key->has_per_samp) { - key->fsaturate_s = 0; - key->fsaturate_t = 0; - key->fsaturate_r = 0; - key->fastc_srgb = 0; - key->fsamples = 0; - } - - /* VS and GS only care about whether or not we're tessellating. */ - key->tessellation = !!key->tessellation; - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - key->color_two_side = false; - key->rasterflat = false; - if (key->has_per_samp) { - key->fsaturate_s = 0; - key->fsaturate_t = 0; - key->fsaturate_r = 0; - key->fastc_srgb = 0; - key->fsamples = 0; - key->vsaturate_s = 0; - key->vsaturate_t = 0; - key->vsaturate_r = 0; - key->vastc_srgb = 0; - key->vsamples = 0; - } - break; - default: - /* TODO */ - break; - } -} - /** * On a4xx+a5xx, Images share state with textures and SSBOs: * @@ -674,6 +618,11 @@ struct ir3_shader { /* Map from driver_location to byte offset in per-primitive storage */ unsigned output_loc[32]; + + /* Bitmask of bits of the shader key used by this shader. Used to avoid + * recompiles for GL NOS that doesn't actually apply to the shader. + */ + struct ir3_shader_key key_mask; }; void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id); @@ -692,6 +641,18 @@ ir3_glsl_type_size(const struct glsl_type *type, bool bindless); * Helper/util: */ +/* clears shader-key flags which don't apply to the given shader. + */ +static inline void +ir3_key_clear_unused(struct ir3_shader_key *key, struct ir3_shader *shader) +{ + uint32_t *key_bits = (uint32_t *)key; + uint32_t *key_mask = (uint32_t *)&shader->key_mask; + STATIC_ASSERT(sizeof(*key) % 4 == 0); + for (int i = 0; i < sizeof(*key) >> 2; i++) + key_bits[i] &= key_mask[i]; +} + static inline int ir3_find_output(const struct ir3_shader_variant *so, gl_varying_slot slot) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 6e31fa0195a..2f7a01603aa 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -77,11 +77,11 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key, struct ir3_shader_variant *v; bool created = false; - /* some shader key values only apply to vertex or frag shader, - * so normalize the key to avoid constructing multiple identical - * variants: + /* Some shader key values may not be used by a given ir3_shader (for + * example, fragment shader saturates in the vertex shader), so clean out + * those flags to avoid recompiling. */ - ir3_normalize_key(&key, shader->type); + ir3_key_clear_unused(&key, shader); v = ir3_shader_get_variant(shader, &key, binning_pass, &created); -- 2.30.2