From: Rob Clark <robdclark@chromium.org>
Date: Fri, 2 Aug 2019 21:07:47 +0000 (-0700)
Subject: freedreno/ir3+a6xx: same VBO state for draw/binning
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=882d53d8e36592a39cde947e890969a81b2b1226;p=mesa.git

freedreno/ir3+a6xx: same VBO state for draw/binning

Worth ~+20% on gl_driver2

Signed-off-by: Rob Clark <robdclark@chromium.org>
---

diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index cbbb9bb61b0..872a6fb0fc2 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1081,7 +1081,7 @@ void ir3_a6xx_fixup_atomic_dests(struct ir3 *ir, struct ir3_shader_variant *so);
 
 /* register assignment: */
 struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler);
-int ir3_ra(struct ir3 *ir3);
+int ir3_ra(struct ir3_shader_variant *v);
 
 /* legalize: */
 void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary);
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index dca55f33b38..3f4a0f43c99 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2906,6 +2906,32 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 	if (so->binning_pass && (ctx->compiler->gpu_id >= 600))
 		fixup_binning_pass(ctx);
 
+	/* for a6xx+, binning and draw pass VS use same VBO state, so we
+	 * need to make sure not to remove any inputs that are used by
+	 * the nonbinning VS.
+	 */
+	if (ctx->compiler->gpu_id >= 600 && so->binning_pass) {
+		debug_assert(so->type == MESA_SHADER_VERTEX);
+		for (int i = 0; i < ir->ninputs; i++) {
+			struct ir3_instruction *in = ir->inputs[i];
+
+			if (!in)
+				continue;
+
+			unsigned n = i / 4;
+			unsigned c = i % 4;
+
+			debug_assert(n < so->nonbinning->inputs_count);
+
+			if (so->nonbinning->inputs[n].sysval)
+				continue;
+
+			/* be sure to keep inputs, even if only used in VS */
+			if (so->nonbinning->inputs[n].compmask & (1 << c))
+				array_insert(in->block, in->block->keeps, in);
+		}
+	}
+
 	/* Insert mov if there's same instruction for each output.
 	 * eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
 	 */
@@ -2962,7 +2988,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 		ir3_print(ir);
 	}
 
-	ret = ir3_ra(ir);
+	ret = ir3_ra(so);
 	if (ret) {
 		DBG("RA failed!");
 		goto out;
@@ -3003,13 +3029,17 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
 		for (j = 0; j < 4; j++) {
 			struct ir3_instruction *in = inputs[(i*4) + j];
 
-			if (in && !(in->flags & IR3_INSTR_UNUSED)) {
-				reg = in->regs[0]->num - j;
-				if (half) {
-					compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
-				} else {
-					half = !!(in->regs[0]->flags & IR3_REG_HALF);
-				}
+			if (!in)
+				continue;
+
+			if (in->flags & IR3_INSTR_UNUSED)
+				continue;
+
+			reg = in->regs[0]->num - j;
+			if (half) {
+				compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
+			} else {
+				half = !!(in->regs[0]->flags & IR3_REG_HALF);
 			}
 		}
 		so->inputs[i].regid = reg;
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 980cd62c48b..a641661a441 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -330,6 +330,7 @@ struct ir3_ra_instr_data {
 
 /* register-assign context, per-shader */
 struct ir3_ra_ctx {
+	struct ir3_shader_variant *v;
 	struct ir3 *ir;
 
 	struct ir3_ra_reg_set *set;
@@ -1091,6 +1092,60 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static int
 ra_alloc(struct ir3_ra_ctx *ctx)
 {
+	/* Pre-assign VS inputs on a6xx+ binning pass shader, to align
+	 * with draw pass VS, so binning and draw pass can both use the
+	 * same VBO state.
+	 *
+	 * Note that VS inputs are expected to be full precision.
+	 */
+	bool pre_assign_inputs = (ctx->ir->compiler->gpu_id >= 600) &&
+			(ctx->ir->type == MESA_SHADER_VERTEX) &&
+			ctx->v->binning_pass;
+
+	if (pre_assign_inputs) {
+		for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
+			struct ir3_instruction *instr = ctx->ir->inputs[i];
+
+			if (!instr)
+				continue;
+
+			debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH)));
+
+			struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+			/* only consider the first component: */
+			if (id->off > 0)
+				continue;
+
+			unsigned name = ra_name(ctx, id);
+
+			unsigned n = i / 4;
+			unsigned c = i % 4;
+
+			/* 'base' is in scalar (class 0) but we need to map that
+			 * the conflicting register of the appropriate class (ie.
+			 * input could be vec2/vec3/etc)
+			 *
+			 * Note that the higher class (larger than scalar) regs
+			 * are setup to conflict with others in the same class,
+			 * so for example, R1 (scalar) is also the first component
+			 * of D1 (vec2/double):
+			 *
+			 *    Single (base) |  Double
+			 *    --------------+---------------
+			 *       R0         |  D0
+			 *       R1         |  D0 D1
+			 *       R2         |     D1 D2
+			 *       R3         |        D2
+			 *           .. and so on..
+			 */
+			unsigned reg = ctx->set->gpr_to_ra_reg[id->cls]
+					[ctx->v->nonbinning->inputs[n].regid + c];
+
+			ra_set_node_reg(ctx->g, name, reg);
+		}
+	}
+
 	/* pre-assign array elements:
 	 */
 	list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
@@ -1118,6 +1173,35 @@ retry:
 			}
 		}
 
+		/* also need to not conflict with any pre-assigned inputs: */
+		if (pre_assign_inputs) {
+			for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
+				struct ir3_instruction *instr = ctx->ir->inputs[i];
+
+				if (!instr)
+					continue;
+
+				struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+				/* only consider the first component: */
+				if (id->off > 0)
+					continue;
+
+				unsigned name = ra_name(ctx, id);
+
+				/* Check if array intersects with liverange AND register
+				 * range of the input:
+				 */
+				if (intersects(arr->start_ip, arr->end_ip,
+						ctx->def[name], ctx->use[name]) &&
+					intersects(base, base + arr->length,
+						i, i + class_sizes[id->cls])) {
+					base = MAX2(base, i + class_sizes[id->cls]);
+					goto retry;
+				}
+			}
+		}
+
 		arr->reg = base;
 
 		for (unsigned i = 0; i < arr->length; i++) {
@@ -1140,11 +1224,12 @@ retry:
 	return 0;
 }
 
-int ir3_ra(struct ir3 *ir)
+int ir3_ra(struct ir3_shader_variant *v)
 {
 	struct ir3_ra_ctx ctx = {
-			.ir = ir,
-			.set = ir->compiler->set,
+			.v = v,
+			.ir = v->ir,
+			.set = v->ir->compiler->set,
 	};
 	int ret;
 
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 7c686f0ee2a..aae7baeb2e0 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -178,9 +178,14 @@ assemble_variant(struct ir3_shader_variant *v)
 	v->ir = NULL;
 }
 
+/*
+ * For creating normal shader variants, 'nonbinning' is NULL.  For
+ * creating binning pass shader, it is link to corresponding normal
+ * (non-binning) variant.
+ */
 static struct ir3_shader_variant *
 create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
-		bool binning_pass)
+		struct ir3_shader_variant *nonbinning)
 {
 	struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
 	int ret;
@@ -190,7 +195,8 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
 
 	v->id = ++shader->variant_count;
 	v->shader = shader;
-	v->binning_pass = binning_pass;
+	v->binning_pass = !!nonbinning;
+	v->nonbinning = nonbinning;
 	v->key = *key;
 	v->type = shader->type;
 
@@ -226,7 +232,7 @@ shader_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
 			return v;
 
 	/* compile new variant if it doesn't exist already: */
-	v = create_variant(shader, key, false);
+	v = create_variant(shader, key, NULL);
 	if (v) {
 		v->next = shader->variants;
 		shader->variants = v;
@@ -246,7 +252,7 @@ ir3_shader_get_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
 
 	if (v && binning_pass) {
 		if (!v->binning) {
-			v->binning = create_variant(shader, key, true);
+			v->binning = create_variant(shader, key, v);
 			*created = true;
 		}
 		mtx_unlock(&shader->variants_lock);
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 53889c7f2ed..f6896c3526b 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -391,7 +391,10 @@ struct ir3_shader_variant {
 	 * which is pointed to by so->binning:
 	 */
 	bool binning_pass;
-	struct ir3_shader_variant *binning;
+//	union {
+		struct ir3_shader_variant *binning;
+		struct ir3_shader_variant *nonbinning;
+//	};
 
 	struct ir3_info info;
 	struct ir3 *ir;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index ef584177d16..59e0a9780e0 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -791,10 +791,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
 		struct fd_ringbuffer *state;
 
 		state = build_vbo_state(emit, emit->vs);
-		fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x6);
-
-		state = build_vbo_state(emit, emit->bs);
-		fd6_emit_take_group(emit, state, FD6_GROUP_VBO_BINNING, 0x1);
+		fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x7);
 	}
 
 	if (dirty & FD_DIRTY_ZSA) {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
index 2ffb76c3900..bc66884fb5a 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
@@ -49,7 +49,6 @@ enum fd6_state_id {
 	FD6_GROUP_LRZ,
 	FD6_GROUP_LRZ_BINNING,
 	FD6_GROUP_VBO,
-	FD6_GROUP_VBO_BINNING,
 	FD6_GROUP_VS_CONST,
 	FD6_GROUP_FS_CONST,
 	FD6_GROUP_VS_TEX,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 3aa91c312b3..a2acaa7b5c2 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -703,6 +703,14 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
 	state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
 	state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
 
+#ifdef DEBUG
+	for (unsigned i = 0; i < bs->inputs_count; i++) {
+		if (vs->inputs[i].sysval)
+			continue;
+		debug_assert(bs->inputs[i].regid == vs->inputs[i].regid);
+	}
+#endif
+
 	setup_config_stateobj(state->config_stateobj, state);
 	setup_stateobj(state->binning_stateobj, ctx->screen, state, key, true);
 	setup_stateobj(state->stateobj, ctx->screen, state, key, false);