X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_ra.c;h=441a6eb7ce964e7cff733a4de4081f581827d286;hb=836d41d77265a2d2ca42bdbfd25de07b9bb134c9;hp=cc05d0cac2420662c51cff853008cb7cf531891c;hpb=65f604e3b3b25bb95c96062675817a3828562e26;p=mesa.git

diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index cc05d0cac24..441a6eb7ce9 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -30,7 +30,7 @@
 #include "util/bitset.h"
 
 #include "ir3.h"
-#include "ir3_compiler.h"
+#include "ir3_shader.h"
 #include "ir3_ra.h"
 
 
@@ -563,13 +563,16 @@ ra_init(struct ir3_ra_ctx *ctx)
 	ctx->hr0_xyz_nodes = ctx->alloc_count;
 	ctx->alloc_count += 3;
 
+	/* Add vreg name for prefetch-exclusion range: */
+	ctx->prefetch_exclude_node = ctx->alloc_count++;
+
 	ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
 	ralloc_steal(ctx->g, ctx->instrd);
 	ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
 	ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
 
 	/* TODO add selector callback for split (pre-a6xx) register file: */
-	if (ctx->ir->compiler->gpu_id >= 600) {
+	if (ctx->v->mergedregs) {
 		ra_set_select_reg_callback(ctx->g, ra_select_reg_merged, ctx);
 
 		if (ctx->scalar_pass) {
@@ -711,11 +714,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 			 */
 			if (is_tex_or_prefetch(instr)) {
 				int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
-				int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
+				int r0_xyz = is_half(instr) ?
 					ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
 				for (int i = 0; i < writemask_skipped_regs; i++)
 					ra_add_node_interference(ctx->g, name, r0_xyz + i);
 			}
+
+			/* Pre-fetched textures have a lower limit for bits to encode dst
+			 * register, so add additional interference with registers above
+			 * that limit.
+			 */
+			if (instr->opc == OPC_META_TEX_PREFETCH) {
+				ra_add_node_interference(ctx->g, name,
+						ctx->prefetch_exclude_node);
+			}
 		}
 
 		foreach_use (name, ctx, instr) {
@@ -1011,7 +1023,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 		arr->end_ip = 0;
 	}
 
-
 	/* set up the r0.xyz precolor regs. */
 	for (int i = 0; i < 3; i++) {
 		ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
@@ -1019,6 +1030,12 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 				ctx->set->first_half_reg + i);
 	}
 
+	/* pre-color node that conflict with half/full regs higher than what
+	 * can be encoded for tex-prefetch:
+	 */
+	ra_set_node_reg(ctx->g, ctx->prefetch_exclude_node,
+			ctx->set->prefetch_exclude_reg);
+
 	/* compute live ranges (use/def) on a block level, also updating
 	 * block's def/use bitmasks (used below to calculate per-block
 	 * livein/liveout):
@@ -1105,60 +1122,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 	}
 }
 
-/* some instructions need fix-up if dst register is half precision: */
-static void fixup_half_instr_dst(struct ir3_instruction *instr)
-{
-	switch (opc_cat(instr->opc)) {
-	case 1: /* move instructions */
-		instr->cat1.dst_type = half_type(instr->cat1.dst_type);
-		break;
-	case 4:
-		switch (instr->opc) {
-		case OPC_RSQ:
-			instr->opc = OPC_HRSQ;
-			break;
-		case OPC_LOG2:
-			instr->opc = OPC_HLOG2;
-			break;
-		case OPC_EXP2:
-			instr->opc = OPC_HEXP2;
-			break;
-		default:
-			break;
-		}
-		break;
-	case 5:
-		instr->cat5.type = half_type(instr->cat5.type);
-		break;
-	}
-}
-/* some instructions need fix-up if src register is half precision: */
-static void fixup_half_instr_src(struct ir3_instruction *instr)
-{
-	switch (instr->opc) {
-	case OPC_MOV:
-		instr->cat1.src_type = half_type(instr->cat1.src_type);
-		break;
-	case OPC_MAD_F32:
-		instr->opc = OPC_MAD_F16;
-		break;
-	case OPC_SEL_B32:
-		instr->opc = OPC_SEL_B16;
-		break;
-	case OPC_SEL_S32:
-		instr->opc = OPC_SEL_S16;
-		break;
-	case OPC_SEL_F32:
-		instr->opc = OPC_SEL_F16;
-		break;
-	case OPC_SAD_S32:
-		instr->opc = OPC_SAD_S16;
-		break;
-	default:
-		break;
-	}
-}
-
 /* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first
  * array access(es) which do not have any previous access to depend
  * on from scheduling point of view
@@ -1241,8 +1204,6 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 		if (writes_gpr(instr)) {
 			if (should_assign(ctx, instr)) {
 				reg_assign(ctx, instr->regs[0], instr);
-				if (instr->regs[0]->flags & IR3_REG_HALF)
-					fixup_half_instr_dst(instr);
 			}
 		}
 
@@ -1258,9 +1219,6 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 			/* Note: reg->instr could be null for IR3_REG_ARRAY */
 			if (src || (reg->flags & IR3_REG_ARRAY))
 				reg_assign(ctx, instr->regs[n+1], src);
-
-			if (instr->regs[n+1]->flags & IR3_REG_HALF)
-				fixup_half_instr_src(instr);
 		}
 	}
 
@@ -1269,8 +1227,6 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 	 * them in the first pass:
 	 */
 	if (!ctx->scalar_pass) {
-		struct ir3_instruction *in, *out;
-
 		foreach_input (in, ctx->ir) {
 			reg_assign(ctx, in->regs[0], in);
 		}
@@ -1354,13 +1310,6 @@ ra_precolor(struct ir3_ra_ctx *ctx, struct ir3_instruction **precolor, unsigned
 
 			debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH)));
 
-			/* only consider the first component: */
-			if (id->off > 0)
-				continue;
-
-			if (ctx->scalar_pass && !should_assign(ctx, instr))
-				continue;
-
 			/* 'base' is in scalar (class 0) but we need to map that
 			 * the conflicting register of the appropriate class (ie.
 			 * input could be vec2/vec3/etc)
@@ -1379,6 +1328,9 @@ ra_precolor(struct ir3_ra_ctx *ctx, struct ir3_instruction **precolor, unsigned
 			 *           .. and so on..
 			 */
 			unsigned regid = instr->regs[0]->num;
+			assert(regid >= id->off);
+			regid -= id->off;
+
 			unsigned reg = ctx->set->gpr_to_ra_reg[id->cls][regid];
 			unsigned name = ra_name(ctx, id);
 			ra_set_node_reg(ctx->g, name, reg);
@@ -1532,7 +1484,8 @@ ir3_ra_pass(struct ir3_shader_variant *v, struct ir3_instruction **precolor,
 	struct ir3_ra_ctx ctx = {
 			.v = v,
 			.ir = v->ir,
-			.set = v->ir->compiler->set,
+			.set = v->mergedregs ?
+				v->ir->compiler->mergedregs_set : v->ir->compiler->set,
 			.scalar_pass = scalar_pass,
 	};
 	int ret;