X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_context.c;h=c389f750bd57596f06b4b09ed6a3972b238f9180;hb=0324706764b9d0a1a6a6c1af13fc7cfb01500d80;hp=8c7d9a33f3ad04902ba2b4943b499462681156c8;hpb=ef3eecd66bdcaa3991dd2b53cb3e7285bed6d718;p=mesa.git

diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 8c7d9a33f3a..c389f750bd5 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -24,8 +24,6 @@
  *    Rob Clark <robclark@freedesktop.org>
  */
 
-#include "util/u_math.h"
-
 #include "ir3_compiler.h"
 #include "ir3_context.h"
 #include "ir3_image.h"
@@ -73,19 +71,31 @@ ir3_context_init(struct ir3_compiler *compiler,
 	 * creating duplicate variants..
 	 */
 
-	if (ir3_key_lowers_nir(&so->key)) {
-		nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
-		ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
-	} else {
-		/* fast-path for shader key that lowers nothing in NIR: */
-		ctx->s = nir_shader_clone(ctx, so->shader->nir);
-	}
+	ctx->s = nir_shader_clone(ctx, so->shader->nir);
+	if (ir3_key_lowers_nir(&so->key))
+		ir3_optimize_nir(so->shader, ctx->s, &so->key);
 
 	/* this needs to be the last pass run, so do this here instead of
 	 * in ir3_optimize_nir():
 	 */
 	NIR_PASS_V(ctx->s, nir_lower_bool_to_int32);
 	NIR_PASS_V(ctx->s, nir_lower_locals_to_regs);
+
+	/* We want to lower nir_op_imul as late as possible, to catch also
+	 * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
+	 * However, we want a final swing of a few passes to have a chance
+	 * at optimizing the result.
+	 */
+	bool progress = false;
+	NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
+	if (progress) {
+		NIR_PASS_V(ctx->s, nir_opt_algebraic);
+		NIR_PASS_V(ctx->s, nir_opt_copy_prop_vars);
+		NIR_PASS_V(ctx->s, nir_opt_dead_write_vars);
+		NIR_PASS_V(ctx->s, nir_opt_dce);
+		NIR_PASS_V(ctx->s, nir_opt_constant_folding);
+	}
+
 	NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
 
 	if (ir3_shader_debug & IR3_DBG_DISASM) {
@@ -101,73 +111,8 @@ ir3_context_init(struct ir3_compiler *compiler,
 		nir_print_shader(ctx->s, stderr);
 	}
 
-	ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
-
-	so->num_uniforms = ctx->s->num_uniforms;
-	so->num_ubos = ctx->s->info.num_ubos;
-
 	ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
 
-	/* Layout of constant registers, each section aligned to vec4.  Note
-	 * that pointer size (ubo, etc) changes depending on generation.
-	 *
-	 *    user consts
-	 *    UBO addresses
-	 *    SSBO sizes
-	 *    if (vertex shader) {
-	 *        driver params (IR3_DP_*)
-	 *        if (stream_output.num_outputs > 0)
-	 *           stream-out addresses
-	 *    }
-	 *    immediates
-	 *
-	 * Immediates go last mostly because they are inserted in the CP pass
-	 * after the nir -> ir3 frontend.
-	 *
-	 * Note UBO size in bytes should be aligned to vec4
-	 */
-	debug_assert((ctx->so->shader->ubo_state.size % 16) == 0);
-	unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4);
-	unsigned ptrsz = ir3_pointer_size(ctx->compiler);
-
-	memset(&so->constbase, ~0, sizeof(so->constbase));
-
-	if (so->num_ubos > 0) {
-		so->constbase.ubo = constoff;
-		constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
-	}
-
-	if (so->const_layout.ssbo_size.count > 0) {
-		unsigned cnt = so->const_layout.ssbo_size.count;
-		so->constbase.ssbo_sizes = constoff;
-		constoff += align(cnt, 4) / 4;
-	}
-
-	if (so->const_layout.image_dims.count > 0) {
-		unsigned cnt = so->const_layout.image_dims.count;
-		so->constbase.image_dims = constoff;
-		constoff += align(cnt, 4) / 4;
-	}
-
-	unsigned num_driver_params = 0;
-	if (so->type == MESA_SHADER_VERTEX) {
-		num_driver_params = IR3_DP_VS_COUNT;
-	} else if (so->type == MESA_SHADER_COMPUTE) {
-		num_driver_params = IR3_DP_CS_COUNT;
-	}
-
-	so->constbase.driver_param = constoff;
-	constoff += align(num_driver_params, 4) / 4;
-
-	if ((so->type == MESA_SHADER_VERTEX) &&
-			(compiler->gpu_id < 500) &&
-			so->shader->stream_output.num_outputs > 0) {
-		so->constbase.tfbo = constoff;
-		constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
-	}
-
-	so->constbase.immediate = constoff;
-
 	return ctx;
 }
 
@@ -237,7 +182,7 @@ ir3_get_src(struct ir3_context *ctx, nir_src *src)
 		for (unsigned i = 0; i < num_components; i++) {
 			unsigned n = src->reg.base_offset * reg->num_components + i;
 			compile_assert(ctx, n < arr->length);
-			value[i] = ir3_create_array_load(ctx, arr, n, addr);
+			value[i] = ir3_create_array_load(ctx, arr, n, addr, reg->bit_size);
 		}
 
 		return value;
@@ -541,20 +486,28 @@ ir3_get_array(struct ir3_context *ctx, nir_register *reg)
 /* relative (indirect) if address!=NULL */
 struct ir3_instruction *
 ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
-		struct ir3_instruction *address)
+		struct ir3_instruction *address, unsigned bitsize)
 {
 	struct ir3_block *block = ctx->block;
 	struct ir3_instruction *mov;
 	struct ir3_register *src;
+	unsigned flags = 0;
 
 	mov = ir3_instr_create(block, OPC_MOV);
-	mov->cat1.src_type = TYPE_U32;
-	mov->cat1.dst_type = TYPE_U32;
+	if (bitsize < 32) {
+		mov->cat1.src_type = TYPE_U16;
+		mov->cat1.dst_type = TYPE_U16;
+		flags |= IR3_REG_HALF;
+	} else {
+		mov->cat1.src_type = TYPE_U32;
+		mov->cat1.dst_type = TYPE_U32;
+	}
+
 	mov->barrier_class = IR3_BARRIER_ARRAY_R;
 	mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
-	ir3_reg_create(mov, 0, 0);
+	ir3_reg_create(mov, 0, flags);
 	src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
-			COND(address, IR3_REG_RELATIV));
+			COND(address, IR3_REG_RELATIV) | flags);
 	src->instr = arr->last_write;
 	src->size  = arr->length;
 	src->array.id = arr->id;
@@ -577,8 +530,11 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
 
 	/* if not relative store, don't create an extra mov, since that
 	 * ends up being difficult for cp to remove.
+	 *
+	 * Also, don't skip the mov if the src is meta (like fanout/split),
+	 * since that creates a situation that RA can't really handle properly.
 	 */
-	if (!address) {
+	if (!address && !is_meta(src)) {
 		dst = src->regs[0];
 
 		src->barrier_class |= IR3_BARRIER_ARRAY_W;