For shader-cache, we want to not have anything important in `ir3_shader`.
And to have shader variants with lower const size limits (to properly
handle cross-stage limits), we also want variants to be able to have
their own const_state.
But we still need binning pass shaders to align with their draw pass
counterpart so that the same const emit can be used for both passes.
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5508>
struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
shader->compiler = c;
shader->type = MESA_SHADER_COMPUTE;
- shader->const_state = rzalloc_size(shader, sizeof(*shader->const_state));
mtx_init(&shader->variants_lock, mtx_plain);
struct ir3_shader_variant *v = rzalloc_size(shader, sizeof(*v));
v->type = MESA_SHADER_COMPUTE;
v->shader = shader;
+ v->const_state = rzalloc_size(v, sizeof(*v->const_state));
shader->variants = v;
shader->variant_count = 1;
ir3_optimize_loop(s);
- /* do ubo load and idiv lowering after first opt loop to get a chance to
- * propagate constants for divide by immed power-of-two and constant ubo
- * block/offsets:
- *
- * NOTE that UBO analysis pass should only be done once, before variants
+ /* do idiv lowering after first opt loop to get a chance to propagate
+ * constants for divide by immed power-of-two:
*/
- const bool ubo_progress = OPT(s, ir3_nir_analyze_ubo_ranges, shader);
const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
- /* UBO offset lowering has to come after we've decided what will be left as load_ubo */
- OPT_V(s, ir3_nir_lower_io_offsets, shader->compiler->gpu_id);
- if (ubo_progress || idiv_progress)
+ if (idiv_progress)
ir3_optimize_loop(s);
OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
}
nir_sweep(s);
-
- /* The first time thru, when not creating variant, do the one-time
- * const_state layout setup. This should be done after ubo range
- * analysis.
- */
- ir3_setup_const_state(shader, s, shader->const_state);
}
void
progress |= OPT(s, nir_lower_tex, &tex_options);
}
+ progress |= OPT(s, ir3_nir_analyze_ubo_ranges, so);
+
+ /* UBO offset lowering has to come after we've decided what will
+ * be left as load_ubo
+ */
+ OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id);
+
if (progress)
ir3_optimize_loop(s);
}
nir_sweep(s);
+
+ /* Binning pass variants re-use the const_state of the corresponding
+ * draw pass shader, so that same const emit can be re-used for both
+ * passes:
+ */
+ if (!so->binning_pass)
+ ir3_setup_const_state(s, so, ir3_const_state(so));
}
static void
}
}
-/* Sets up the non-variant-dependent constant state for the ir3_shader. Note
+/* Sets up the variant-dependent constant state for the ir3_shader. Note
* that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
* maximum number of driver params that would eventually be used, to leave
* space for this function to allocate the driver params.
*/
void
-ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
- struct ir3_const_state *const_state)
+ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
+ struct ir3_const_state *const_state)
{
- struct ir3_compiler *compiler = shader->compiler;
+ struct ir3_compiler *compiler = v->shader->compiler;
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
ir3_nir_scan_driver_consts(nir, const_state);
if ((compiler->gpu_id < 500) &&
- (shader->stream_output.num_outputs > 0)) {
+ (v->shader->stream_output.num_outputs > 0)) {
const_state->num_driver_params =
MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1);
}
const_state->offsets.driver_param = constoff;
constoff += const_state->num_driver_params / 4;
- if ((shader->type == MESA_SHADER_VERTEX) &&
+ if ((v->type == MESA_SHADER_VERTEX) &&
(compiler->gpu_id < 500) &&
- shader->stream_output.num_outputs > 0) {
+ v->shader->stream_output.num_outputs > 0) {
const_state->offsets.tfbo = constoff;
constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
}
- switch (shader->type) {
+ switch (v->type) {
case MESA_SHADER_VERTEX:
const_state->offsets.primitive_param = constoff;
constoff += 1;
void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s);
void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
-void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
+void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
struct ir3_const_state *const_state);
-
-bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader);
+bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
nir_ssa_def *
ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
}
bool
-ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
+ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
{
- struct ir3_const_state *const_state = shader->const_state;
+ struct ir3_const_state *const_state = ir3_const_state(v);
struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+ struct ir3_compiler *compiler = v->shader->compiler;
memset(state, 0, sizeof(*state));
for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
nir_foreach_instr (instr, block) {
if (instr_is_load_ubo(instr))
gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr),
- state, shader->compiler->const_upload_unit);
+ state, compiler->const_upload_unit);
}
}
}
* be driver params but this pass usually eliminatings them.
*/
struct ir3_const_state worst_case_const_state = { };
- ir3_setup_const_state(shader, nir, &worst_case_const_state);
- const uint32_t max_upload = (shader->compiler->max_const -
+ ir3_setup_const_state(nir, v, &worst_case_const_state);
+ const uint32_t max_upload = (compiler->max_const -
worst_case_const_state.offsets.immediate) * 16;
- uint32_t offset = shader->num_reserved_user_consts * 16;
+ uint32_t offset = v->shader->num_reserved_user_consts * 16;
state->num_enabled = ARRAY_SIZE(state->range);
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
if (state->range[i].start >= state->range[i].end) {
if (instr_is_load_ubo(instr))
lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr),
&builder, state, &num_ubos,
- shader->compiler->const_upload_unit);
+ compiler->const_upload_unit);
}
}
v->mergedregs = false;
}
+ if (!v->binning_pass)
+ v->const_state = rzalloc_size(v, sizeof(*v->const_state));
+
ret = ir3_compile_shader_nir(shader->compiler, v);
if (ret) {
debug_error("compile failed!");
if (stream_output)
memcpy(&shader->stream_output, stream_output, sizeof(shader->stream_output));
shader->num_reserved_user_consts = reserved_user_consts;
- shader->const_state = rzalloc_size(shader, sizeof(*shader->const_state));
if (nir->info.stage == MESA_SHADER_GEOMETRY)
NIR_PASS_V(nir, ir3_nir_lower_gs);
*/
unsigned constlen;
+ struct ir3_const_state *const_state;
+
/* About Linkage:
* + Let the frag shader determine the position/compmask for the
* varyings, since it is the place where we know if the varying
struct ir3_compiler *compiler;
- struct ir3_const_state *const_state;
unsigned num_reserved_user_consts;
struct nir_shader *nir;
struct ir3_shader_key key_mask;
};
+/**
+ * In order to use the same cmdstream, in particular constlen setup and const
+ * emit, for both binning and draw pass (a6xx+), the binning pass re-uses it's
+ * corresponding draw pass shaders const_state.
+ */
static inline struct ir3_const_state *
ir3_const_state(const struct ir3_shader_variant *v)
{
- return v->shader->const_state;
+ if (v->binning_pass)
+ return v->nonbinning->const_state;
+ return v->const_state;
}
void * ir3_shader_assemble(struct ir3_shader_variant *v);
bool layered_clear)
{
struct ir3_const_state dummy_const_state = {};
- struct ir3_shader dummy_shader = {
- .const_state = &dummy_const_state,
- };
+ struct ir3_shader dummy_shader = {};
struct ir3_shader_variant vs = {
.type = MESA_SHADER_VERTEX,
.regid = regid(1, 0),
},
.shader = &dummy_shader,
+ .const_state = &dummy_const_state,
};
if (layered_clear) {
vs = (struct ir3_shader_variant) {
.instrlen = 1,
.info.max_reg = 0,
.shader = &dummy_shader,
+ .const_state = &dummy_const_state,
};
}
.cmd = 4,
},
.shader = &dummy_shader,
+ .const_state = &dummy_const_state,
};
struct ir3_shader_variant gs_shader = {
.regid = regid(1, 0),
},
.shader = &dummy_shader,
+ .const_state = &dummy_const_state,
}, *gs = layered_clear ? &gs_shader : NULL;