From: Rob Clark <robdclark@chromium.org>
Date: Mon, 15 Jun 2020 19:14:04 +0000 (-0700)
Subject: freedreno/ir3: move const_state back to variant
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=640ff0e847f8b4e8799499b0c6c31b194ab5e468;p=mesa.git

freedreno/ir3: move const_state back to variant

For shader-cache, we want to not have anything important in `ir3_shader`.
And to have shader variants with lower const size limits (to properly
handle cross-stage limits), we also want variants to be able to have
their own const_state.

But we still need binning pass shaders to align with their draw pass
counterpart so that the same const emit can be used for both passes.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5508>
---

diff --git a/src/freedreno/ir3/ir3_assembler.c b/src/freedreno/ir3/ir3_assembler.c
index e81eda6e9ae..6dee9475da5 100644
--- a/src/freedreno/ir3/ir3_assembler.c
+++ b/src/freedreno/ir3/ir3_assembler.c
@@ -37,12 +37,12 @@ ir3_parse_asm(struct ir3_compiler *c, struct ir3_kernel_info *info, FILE *in)
 	struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));
 	shader->compiler = c;
 	shader->type = MESA_SHADER_COMPUTE;
-	shader->const_state = rzalloc_size(shader, sizeof(*shader->const_state));
 	mtx_init(&shader->variants_lock, mtx_plain);
 
 	struct ir3_shader_variant *v = rzalloc_size(shader, sizeof(*v));
 	v->type = MESA_SHADER_COMPUTE;
 	v->shader = shader;
+	v->const_state = rzalloc_size(v, sizeof(*v->const_state));
 
 	shader->variants = v;
 	shader->variant_count = 1;
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index de76cb70e55..a73210e5cf2 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -249,18 +249,12 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s)
 
 	ir3_optimize_loop(s);
 
-	/* do ubo load and idiv lowering after first opt loop to get a chance to
-	 * propagate constants for divide by immed power-of-two and constant ubo
-	 * block/offsets:
-	 *
-	 * NOTE that UBO analysis pass should only be done once, before variants
+	/* do idiv lowering after first opt loop to get a chance to propagate
+	 * constants for divide by immed power-of-two:
 	 */
-	const bool ubo_progress = OPT(s, ir3_nir_analyze_ubo_ranges, shader);
 	const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
-	/* UBO offset lowering has to come after we've decided what will be left as load_ubo */
-	OPT_V(s, ir3_nir_lower_io_offsets, shader->compiler->gpu_id);
 
-	if (ubo_progress || idiv_progress)
+	if (idiv_progress)
 		ir3_optimize_loop(s);
 
 	OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
@@ -272,12 +266,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s)
 	}
 
 	nir_sweep(s);
-
-	/* The first time thru, when not creating variant, do the one-time
-	 * const_state layout setup.  This should be done after ubo range
-	 * analysis.
-	 */
-	ir3_setup_const_state(shader, s, shader->const_state);
 }
 
 void
@@ -356,6 +344,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
 		progress |= OPT(s, nir_lower_tex, &tex_options);
 	}
 
+	progress |= OPT(s, ir3_nir_analyze_ubo_ranges, so);
+
+	/* UBO offset lowering has to come after we've decided what will
+	 * be left as load_ubo
+	 */
+	OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id);
+
 	if (progress)
 		ir3_optimize_loop(s);
 
@@ -382,6 +377,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
 	}
 
 	nir_sweep(s);
+
+	/* Binning pass variants re-use  the const_state of the corresponding
+	 * draw pass shader, so that same const emit can be re-used for both
+	 * passes:
+	 */
+	if (!so->binning_pass)
+		ir3_setup_const_state(s, so, ir3_const_state(so));
 }
 
 static void
@@ -460,23 +462,23 @@ ir3_nir_scan_driver_consts(nir_shader *shader,
 	}
 }
 
-/* Sets up the non-variant-dependent constant state for the ir3_shader.  Note
+/* Sets up the variant-dependent constant state for the ir3_shader.  Note
  * that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
  * maximum number of driver params that would eventually be used, to leave
  * space for this function to allocate the driver params.
  */
 void
-ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
-	struct ir3_const_state *const_state)
+ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
+		struct ir3_const_state *const_state)
 {
-	struct ir3_compiler *compiler = shader->compiler;
+	struct ir3_compiler *compiler = v->shader->compiler;
 
 	memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
 
 	ir3_nir_scan_driver_consts(nir, const_state);
 
 	if ((compiler->gpu_id < 500) &&
-			(shader->stream_output.num_outputs > 0)) {
+			(v->shader->stream_output.num_outputs > 0)) {
 		const_state->num_driver_params =
 			MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1);
 	}
@@ -511,14 +513,14 @@ ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
 		const_state->offsets.driver_param = constoff;
 	constoff += const_state->num_driver_params / 4;
 
-	if ((shader->type == MESA_SHADER_VERTEX) &&
+	if ((v->type == MESA_SHADER_VERTEX) &&
 			(compiler->gpu_id < 500) &&
-			shader->stream_output.num_outputs > 0) {
+			v->shader->stream_output.num_outputs > 0) {
 		const_state->offsets.tfbo = constoff;
 		constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
 	}
 
-	switch (shader->type) {
+	switch (v->type) {
 	case MESA_SHADER_VERTEX:
 		const_state->offsets.primitive_param = constoff;
 		constoff += 1;
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index 67c56f931d7..0ad9b35f5fe 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -55,10 +55,9 @@ const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler
 void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s);
 void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
 
-void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
+void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
 		struct ir3_const_state *const_state);
-
-bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader);
+bool ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
 
 nir_ssa_def *
 ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_ssa_def *offset, int32_t shift);
diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
index 0ae11a3512a..d14e5499445 100644
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@@ -302,10 +302,11 @@ instr_is_load_ubo(nir_instr *instr)
 }
 
 bool
-ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
+ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
 {
-	struct ir3_const_state *const_state = shader->const_state;
+	struct ir3_const_state *const_state = ir3_const_state(v);
 	struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
+	struct ir3_compiler *compiler = v->shader->compiler;
 
 	memset(state, 0, sizeof(*state));
 	for (int i = 0; i < IR3_MAX_UBO_PUSH_RANGES; i++) {
@@ -318,7 +319,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
 				nir_foreach_instr (instr, block) {
 					if (instr_is_load_ubo(instr))
 						gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr),
-								state, shader->compiler->const_upload_unit);
+								state, compiler->const_upload_unit);
 				}
 			}
 		}
@@ -340,11 +341,11 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
 	 * be driver params but this pass usually eliminatings them.
 	 */
 	struct ir3_const_state worst_case_const_state = { };
-	ir3_setup_const_state(shader, nir, &worst_case_const_state);
-	const uint32_t max_upload = (shader->compiler->max_const -
+	ir3_setup_const_state(nir, v, &worst_case_const_state);
+	const uint32_t max_upload = (compiler->max_const -
 			worst_case_const_state.offsets.immediate) * 16;
 
-	uint32_t offset = shader->num_reserved_user_consts * 16;
+	uint32_t offset = v->shader->num_reserved_user_consts * 16;
 	state->num_enabled = ARRAY_SIZE(state->range);
 	for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
 		if (state->range[i].start >= state->range[i].end) {
@@ -375,7 +376,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
 					if (instr_is_load_ubo(instr))
 						lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr),
 								&builder, state, &num_ubos,
-								shader->compiler->const_upload_unit);
+								compiler->const_upload_unit);
 				}
 			}
 
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 66f72c98dc1..bb5dfa6343f 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -205,6 +205,9 @@ create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
 		v->mergedregs = false;
 	}
 
+	if (!v->binning_pass)
+		v->const_state = rzalloc_size(v, sizeof(*v->const_state));
+
 	ret = ir3_compile_shader_nir(shader->compiler, v);
 	if (ret) {
 		debug_error("compile failed!");
@@ -345,7 +348,6 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
 	if (stream_output)
 		memcpy(&shader->stream_output, stream_output, sizeof(shader->stream_output));
 	shader->num_reserved_user_consts = reserved_user_consts;
-	shader->const_state = rzalloc_size(shader, sizeof(*shader->const_state));
 
 	if (nir->info.stage == MESA_SHADER_GEOMETRY)
 		NIR_PASS_V(nir, ir3_nir_lower_gs);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index ff131cc1838..41ca394837b 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -468,6 +468,8 @@ struct ir3_shader_variant {
 	 */
 	unsigned constlen;
 
+	struct ir3_const_state *const_state;
+
 	/* About Linkage:
 	 *   + Let the frag shader determine the position/compmask for the
 	 *     varyings, since it is the place where we know if the varying
@@ -621,7 +623,6 @@ struct ir3_shader {
 
 	struct ir3_compiler *compiler;
 
-	struct ir3_const_state *const_state;
 	unsigned num_reserved_user_consts;
 
 	struct nir_shader *nir;
@@ -641,10 +642,17 @@ struct ir3_shader {
 	struct ir3_shader_key key_mask;
 };
 
+/**
+ * In order to use the same cmdstream, in particular constlen setup and const
+ * emit, for both binning and draw pass (a6xx+), the binning pass re-uses it's
+ * corresponding draw pass shaders const_state.
+ */
 static inline struct ir3_const_state *
 ir3_const_state(const struct ir3_shader_variant *v)
 {
-	return v->shader->const_state;
+	if (v->binning_pass)
+		return v->nonbinning->const_state;
+	return v->const_state;
 }
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v);
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index e6e7daf1bfe..c452d7e8f05 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -322,9 +322,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
            bool layered_clear)
 {
    struct ir3_const_state dummy_const_state = {};
-   struct ir3_shader dummy_shader = {
-      .const_state = &dummy_const_state,
-   };
+   struct ir3_shader dummy_shader = {};
 
    struct ir3_shader_variant vs = {
       .type = MESA_SHADER_VERTEX,
@@ -347,6 +345,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
          .regid = regid(1, 0),
       },
       .shader = &dummy_shader,
+      .const_state = &dummy_const_state,
    };
    if (layered_clear) {
       vs = (struct ir3_shader_variant) {
@@ -354,6 +353,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
          .instrlen = 1,
          .info.max_reg = 0,
          .shader = &dummy_shader,
+         .const_state = &dummy_const_state,
       };
    }
 
@@ -383,6 +383,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
          .cmd = 4,
       },
       .shader = &dummy_shader,
+      .const_state = &dummy_const_state,
    };
 
    struct ir3_shader_variant gs_shader = {
@@ -410,6 +411,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
          .regid = regid(1, 0),
       },
       .shader = &dummy_shader,
+      .const_state = &dummy_const_state,
    }, *gs = layered_clear ? &gs_shader : NULL;