From 65660622a1f7284c457388e553867b239343d314 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Wed, 10 Jun 2020 11:11:27 +0200
Subject: [PATCH] ir3: Split out variant-specific lowering and optimizations

It seems a lot of the lowerings being run the second time were
unnecessary. In addition, when const_state is moved to the variant,
then it will become impossible to know ahead of time whether a variant
needs additional optimizing, which means that ir3_key_lowers_nir() needs
to go away. The new approach should have the same effect, since it skips
running lowerings that are unnecessary and then skips the opt loop if no
optimizations made progress, but it will work better when we move
ir3_nir_analyze_ubo_ranges() to be after variant creation.

The one maybe controversial thing I did is to make
nir_opt_algebraic_late() always happen during variant lowering. I wanted
to avoid code duplication, and it seems to me that we should push the
_late variants as far back as possible so that later opt_algebraic runs
don't miss out on optimization opportunities.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5508>
---
 src/freedreno/ir3/ir3_context.c               |   3 +-
 src/freedreno/ir3/ir3_nir.c                   | 191 ++++++++++--------
 src/freedreno/ir3/ir3_nir.h                   |   5 +-
 src/freedreno/ir3/ir3_shader.c                |   2 +-
 .../drivers/freedreno/ir3/ir3_cmdline.c       |   4 +-
 5 files changed, 109 insertions(+), 96 deletions(-)

diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index b9905625b70..522c6f69aca 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -74,8 +74,7 @@ ir3_context_init(struct ir3_compiler *compiler,
 	 */
 
 	ctx->s = nir_shader_clone(ctx, so->shader->nir);
-	if (ir3_key_lowers_nir(&so->key))
-		ir3_optimize_nir(so->shader, ctx->s, &so->key);
+	ir3_nir_lower_variant(so, ctx->s);
 
 	/* this needs to be the last pass run, so do this here instead of
 	 * in ir3_optimize_nir():
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 0afe624a355..2df5715f51c 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -123,17 +123,6 @@ ir3_get_compiler_options(struct ir3_compiler *compiler)
 	return &options;
 }
 
-/* for given shader key, are any steps handled in nir? */
-bool
-ir3_key_lowers_nir(const struct ir3_shader_key *key)
-{
-	return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r |
-			key->vsaturate_s | key->vsaturate_t | key->vsaturate_r |
-			key->ucp_enables | key->color_two_side |
-			key->fclamp_color | key->vclamp_color |
-			key->tessellation | key->has_gs;
-}
-
 #define OPT(nir, pass, ...) ({                             \
    bool this_progress = false;                             \
    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
@@ -224,54 +213,13 @@ should_split_wrmask(const nir_instr *instr, const void *data)
 }
 
 void
-ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
-		const struct ir3_shader_key *key)
+ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s)
 {
 	struct nir_lower_tex_options tex_options = {
 			.lower_rect = 0,
 			.lower_tg4_offsets = true,
 	};
 
-	if (key && (key->has_gs || key->tessellation)) {
-		switch (shader->type) {
-		case MESA_SHADER_VERTEX:
-			NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, shader, key->tessellation);
-			break;
-		case MESA_SHADER_TESS_CTRL:
-			NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, shader, key->tessellation);
-			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
-			break;
-		case MESA_SHADER_TESS_EVAL:
-			NIR_PASS_V(s, ir3_nir_lower_tess_eval, key->tessellation);
-			if (key->has_gs)
-				NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, shader, key->tessellation);
-			break;
-		case MESA_SHADER_GEOMETRY:
-			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (key) {
-		switch (shader->type) {
-		case MESA_SHADER_FRAGMENT:
-			tex_options.saturate_s = key->fsaturate_s;
-			tex_options.saturate_t = key->fsaturate_t;
-			tex_options.saturate_r = key->fsaturate_r;
-			break;
-		case MESA_SHADER_VERTEX:
-			tex_options.saturate_s = key->vsaturate_s;
-			tex_options.saturate_t = key->vsaturate_t;
-			tex_options.saturate_r = key->vsaturate_r;
-			break;
-		default:
-			/* TODO */
-			break;
-		}
-	}
-
 	if (shader->compiler->gpu_id >= 400) {
 		/* a4xx seems to have *no* sam.p */
 		tex_options.lower_txp = ~0;  /* lower all txp */
@@ -289,31 +237,10 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 	OPT_V(s, nir_lower_regs_to_ssa);
 	OPT_V(s, nir_lower_wrmasks, should_split_wrmask, s);
 
-	if (key) {
-		if (s->info.stage == MESA_SHADER_VERTEX) {
-			OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false, false, NULL);
-			if (key->vclamp_color)
-				OPT_V(s, nir_lower_clamp_color_outputs);
-		} else if (s->info.stage == MESA_SHADER_FRAGMENT) {
-			OPT_V(s, nir_lower_clip_fs, key->ucp_enables, false);
-			if (key->fclamp_color)
-				OPT_V(s, nir_lower_clamp_color_outputs);
-		}
-		if (key->color_two_side) {
-			OPT_V(s, nir_lower_two_sided_color);
-		}
-	} else {
-		/* only want to do this the first time (when key is null)
-		 * and not again on any potential 2nd variant lowering pass:
-		 */
-		OPT_V(s, ir3_nir_apply_trig_workarounds);
-
-		/* This wouldn't hurt to run multiple times, but there is
-		 * no need to:
-		 */
-		if (shader->type == MESA_SHADER_FRAGMENT)
-			OPT_V(s, nir_lower_fb_read);
-	}
+	OPT_V(s, ir3_nir_apply_trig_workarounds);
+
+	if (shader->type == MESA_SHADER_FRAGMENT)
+		OPT_V(s, nir_lower_fb_read);
 
 	OPT_V(s, nir_lower_tex, &tex_options);
 	OPT_V(s, nir_lower_load_const_to_scalar);
@@ -328,7 +255,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 	 *
 	 * NOTE that UBO analysis pass should only be done once, before variants
 	 */
-	const bool ubo_progress = !key && OPT(s, ir3_nir_analyze_ubo_ranges, shader);
+	const bool ubo_progress = OPT(s, ir3_nir_analyze_ubo_ranges, shader);
 	const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
 	/* UBO offset lowering has to come after we've decided what will be left as load_ubo */
 	OPT_V(s, ir3_nir_lower_io_offsets, shader->compiler->gpu_id);
@@ -336,6 +263,102 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 	if (ubo_progress || idiv_progress)
 		ir3_optimize_loop(s);
 
+	OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
+
+	if (ir3_shader_debug & IR3_DBG_DISASM) {
+		debug_printf("----------------------\n");
+		nir_print_shader(s, stdout);
+		debug_printf("----------------------\n");
+	}
+
+	nir_sweep(s);
+
+	/* The first time thru, when not creating variant, do the one-time
+	 * const_state layout setup.  This should be done after ubo range
+	 * analysis.
+	 */
+	ir3_setup_const_state(shader, s, &shader->const_state);
+}
+
+void
+ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
+{
+	if (ir3_shader_debug & IR3_DBG_DISASM) {
+		debug_printf("----------------------\n");
+		nir_print_shader(s, stdout);
+		debug_printf("----------------------\n");
+	}
+
+	bool progress = false;
+
+	if (so->key.has_gs || so->key.tessellation) {
+		switch (so->shader->type) {
+		case MESA_SHADER_VERTEX:
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation);
+			progress = true;
+			break;
+		case MESA_SHADER_TESS_CTRL:
+			NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so->shader, so->key.tessellation);
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
+			progress = true;
+			break;
+		case MESA_SHADER_TESS_EVAL:
+			NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation);
+			if (so->key.has_gs)
+				NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation);
+			progress = true;
+			break;
+		case MESA_SHADER_GEOMETRY:
+			NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
+			progress = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (s->info.stage == MESA_SHADER_VERTEX) {
+		if (so->key.ucp_enables)
+			progress |= OPT(s, nir_lower_clip_vs, so->key.ucp_enables, false, false, NULL);
+		if (so->key.vclamp_color)
+			progress |= OPT(s, nir_lower_clamp_color_outputs);
+	} else if (s->info.stage == MESA_SHADER_FRAGMENT) {
+		if (so->key.ucp_enables)
+			progress |= OPT(s, nir_lower_clip_fs, so->key.ucp_enables, false);
+		if (so->key.fclamp_color)
+			progress |= OPT(s, nir_lower_clamp_color_outputs);
+	}
+	if (so->key.color_two_side) {
+		OPT_V(s, nir_lower_two_sided_color);
+		progress = true;
+	}
+
+	struct nir_lower_tex_options tex_options = { };
+
+	switch (so->shader->type) {
+	case MESA_SHADER_FRAGMENT:
+		tex_options.saturate_s = so->key.fsaturate_s;
+		tex_options.saturate_t = so->key.fsaturate_t;
+		tex_options.saturate_r = so->key.fsaturate_r;
+		break;
+	case MESA_SHADER_VERTEX:
+		tex_options.saturate_s = so->key.vsaturate_s;
+		tex_options.saturate_t = so->key.vsaturate_t;
+		tex_options.saturate_r = so->key.vsaturate_r;
+		break;
+	default:
+		/* TODO */
+		break;
+	}
+
+	if (tex_options.saturate_s || tex_options.saturate_t ||
+		tex_options.saturate_r) {
+		progress |= OPT(s, nir_lower_tex, &tex_options);
+	}
+
+	if (progress)
+		ir3_optimize_loop(s);
+
 	/* Do late algebraic optimization to turn add(a, neg(b)) back into
 	* subs, then the mandatory cleanup after algebraic.  Note that it may
 	* produce fnegs, and if so then we need to keep running to squash
@@ -350,8 +373,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 		OPT_V(s, nir_opt_cse);
 	}
 
-	OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
-
 	OPT_V(s, nir_opt_sink, nir_move_const_undef);
 
 	if (ir3_shader_debug & IR3_DBG_DISASM) {
@@ -361,14 +382,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 	}
 
 	nir_sweep(s);
-
-	/* The first time thru, when not creating variant, do the one-time
-	 * const_state layout setup.  This should be done after ubo range
-	 * analysis.
-	 */
-	if (!key) {
-		ir3_setup_const_state(shader, s, &shader->const_state);
-	}
 }
 
 static void
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index 39930a40778..67c56f931d7 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -52,9 +52,8 @@ void ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology);
 void ir3_nir_lower_gs(nir_shader *shader);
 
 const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
-bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
-void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
-		const struct ir3_shader_key *key);
+void ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s);
+void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
 
 void ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
 		struct ir3_const_state *const_state);
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 4bd427b4dc8..78ed751a3f2 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -391,7 +391,7 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
 	NIR_PASS_V(nir, nir_lower_amul, ir3_glsl_type_size);
 
 	/* do first pass optimization, ignoring the key: */
-	ir3_optimize_nir(shader, nir, NULL);
+	ir3_optimize_nir(shader, nir);
 
 	shader->nir = nir;
 	if (ir3_shader_debug & IR3_DBG_DISASM) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index a16c06872c3..cd9950436c5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -491,12 +491,14 @@ int main(int argc, char **argv)
 	s.compiler = compiler;
 	s.nir = nir;
 
-	ir3_optimize_nir(&s, nir, NULL);
+	ir3_optimize_nir(&s, nir);
 
 	v.key = key;
 	v.shader = &s;
 	s.type = v.type = nir->info.stage;
 
+	ir3_nir_lower_variant(&v, nir);
+
 	info = "NIR compiler";
 	ret = ir3_compile_shader_nir(s.compiler, &v);
 	if (ret) {
-- 
2.30.2