From 9edff0cfd4f567a9db5bc02be519e7d48299228a Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 24 Jun 2020 12:03:59 +0200 Subject: [PATCH] ir3: Support variants with different constlen's This provides the mechanism for compiling variants with a reduced constlen. The next patch provides the policy for choosing which to reduce. Part-of: --- src/freedreno/ir3/ir3_compiler.c | 35 ++++++++++++++++--- src/freedreno/ir3/ir3_compiler.h | 20 +++++++++-- src/freedreno/ir3/ir3_cp.c | 3 +- src/freedreno/ir3/ir3_nir.c | 2 +- .../ir3/ir3_nir_analyze_ubo_ranges.c | 2 +- src/freedreno/ir3/ir3_shader.c | 2 ++ src/freedreno/ir3/ir3_shader.h | 31 ++++++++++++++++ 7 files changed, 83 insertions(+), 12 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 7e74f9ba6da..0481187b867 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -65,6 +65,36 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id if (compiler->gpu_id >= 600) { compiler->mergedregs_set = ir3_ra_alloc_reg_set(compiler, true); compiler->samgq_workaround = true; + /* a6xx split the pipeline state into geometry and fragment state, in + * order to let the VS run ahead of the FS. As a result there are now + * separate const files for the the fragment shader and everything + * else, and separate limits. There seems to be a shared limit, but + * it's higher than the vert or frag limits. + * + * TODO: The shared limit seems to be different on different on + * different models. + */ + compiler->max_const_pipeline = 640; + compiler->max_const_frag = 512; + compiler->max_const_geom = 512; + compiler->max_const_safe = 128; + + /* Compute shaders don't share a const file with the FS. Instead they + * have their own file, which is smaller than the FS one. + * + * TODO: is this true on earlier gen's? + */ + compiler->max_const_compute = 256; + } else { + compiler->max_const_pipeline = 512; + compiler->max_const_geom = 512; + compiler->max_const_frag = 512; + compiler->max_const_compute = 512; + + /* Note: this will have to change if/when we support tess+GS on + * earlier gen's. + */ + compiler->max_const_safe = 256; } if (compiler->gpu_id >= 400) { @@ -74,10 +104,6 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id compiler->unminify_coords = false; compiler->txf_ms_with_isaml = false; compiler->array_index_add_half = true; - /* Some a6xxs can apparently do 640 consts, but not all. Need to - * characterize this better across GPUs - */ - compiler->max_const = 512; compiler->const_upload_unit = 4; } else { /* no special handling for "flat" */ @@ -86,7 +112,6 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id compiler->unminify_coords = true; compiler->txf_ms_with_isaml = true; compiler->array_index_add_half = false; - compiler->max_const = 512; compiler->const_upload_unit = 8; } diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 68813c66fdb..f16a8301e16 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -70,10 +70,24 @@ struct ir3_compiler { */ bool samgq_workaround; - /* on a3xx, the limit on const access is lower than later gens (in vec4 - * units): + /* The maximum number of constants, in vec4's, across the entire graphics + * pipeline. */ - uint32_t max_const; + uint16_t max_const_pipeline; + + /* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */ + uint16_t max_const_geom; + + /* The maximum number of constants, in vec4's, for FS. */ + uint16_t max_const_frag; + + /* A "safe" max constlen that can be applied to each shader in the + * pipeline which we guarantee will never exceed any combined limits. + */ + uint16_t max_const_safe; + + /* The maximum number of constants, in vec4's, for compute shaders. */ + uint16_t max_const_compute; /* on a3xx, the unit of indirect const load is higher than later gens (in * vec4 units): diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index e739f792cf8..56f6b2b215d 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -220,12 +220,11 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, } if (i == const_state->immediate_idx) { - struct ir3_compiler *compiler = instr->block->shader->compiler; /* Add on a new immediate to be pushed, if we have space left in the * constbuf. */ if (const_state->offsets.immediate + const_state->immediate_idx / 4 >= - compiler->max_const) + ir3_max_const(ctx->so)) return false; swiz = i % 4; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 97b0d35d215..bf2db6b6c9c 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -552,5 +552,5 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, const_state->offsets.immediate = constoff; - assert(constoff <= compiler->max_const); + assert(constoff <= ir3_max_const(v)); } diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 2cccee24dc8..5e631e31d59 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -372,7 +372,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) */ struct ir3_const_state worst_case_const_state = { }; ir3_setup_const_state(nir, v, &worst_case_const_state); - const uint32_t max_upload = (compiler->max_const - + const uint32_t max_upload = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16; uint32_t offset = v->shader->num_reserved_user_consts * 16; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 5afb5992851..6c64f84e345 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -285,6 +285,8 @@ ir3_setup_used_key(struct ir3_shader *shader) */ key->has_per_samp = true; + key->safe_constlen = true; + if (info->stage == MESA_SHADER_FRAGMENT) { key->fsaturate_s = ~0; key->fsaturate_t = ~0; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 353ce22b243..b2dbc64ce2e 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -299,6 +299,12 @@ struct ir3_shader_key { unsigned tessellation : 2; unsigned has_gs : 1; + + /* Whether this variant sticks to the "safe" maximum constlen, + * which guarantees that the combined stages will never go over + * the limit: + */ + unsigned safe_constlen : 1; }; uint32_t global; }; @@ -369,6 +375,9 @@ ir3_shader_key_changes_fs(struct ir3_shader_key *key, struct ir3_shader_key *las if (last_key->ucp_enables != key->ucp_enables) return true; + if (last_key->safe_constlen != key->safe_constlen) + return true; + return false; } @@ -391,6 +400,9 @@ ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *las if (last_key->ucp_enables != key->ucp_enables) return true; + if (last_key->safe_constlen != key->safe_constlen) + return true; + return false; } @@ -668,6 +680,25 @@ ir3_const_state(const struct ir3_shader_variant *v) return v->const_state; } +/* Given a variant, calculate the maximum constlen it can have. + */ + +static inline unsigned +ir3_max_const(const struct ir3_shader_variant *v) +{ + const struct ir3_compiler *compiler = v->shader->compiler; + + if (v->shader->type == MESA_SHADER_COMPUTE) { + return compiler->max_const_compute; + } else if (v->key.safe_constlen) { + return compiler->max_const_safe; + } else if (v->shader->type == MESA_SHADER_FRAGMENT) { + return compiler->max_const_frag; + } else { + return compiler->max_const_geom; + } +} + void * ir3_shader_assemble(struct ir3_shader_variant *v); struct ir3_shader_variant * ir3_shader_get_variant(struct ir3_shader *shader, const struct ir3_shader_key *key, bool binning_pass, bool *created); -- 2.30.2