X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fir3%2Fir3_ra.c;h=441a6eb7ce964e7cff733a4de4081f581827d286;hb=836d41d77265a2d2ca42bdbfd25de07b9bb134c9;hp=cc05d0cac2420662c51cff853008cb7cf531891c;hpb=65f604e3b3b25bb95c96062675817a3828562e26;p=mesa.git diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index cc05d0cac24..441a6eb7ce9 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -30,7 +30,7 @@ #include "util/bitset.h" #include "ir3.h" -#include "ir3_compiler.h" +#include "ir3_shader.h" #include "ir3_ra.h" @@ -563,13 +563,16 @@ ra_init(struct ir3_ra_ctx *ctx) ctx->hr0_xyz_nodes = ctx->alloc_count; ctx->alloc_count += 3; + /* Add vreg name for prefetch-exclusion range: */ + ctx->prefetch_exclude_node = ctx->alloc_count++; + ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count); ralloc_steal(ctx->g, ctx->instrd); ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count); /* TODO add selector callback for split (pre-a6xx) register file: */ - if (ctx->ir->compiler->gpu_id >= 600) { + if (ctx->v->mergedregs) { ra_set_select_reg_callback(ctx->g, ra_select_reg_merged, ctx); if (ctx->scalar_pass) { @@ -711,11 +714,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) */ if (is_tex_or_prefetch(instr)) { int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1; - int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ? + int r0_xyz = is_half(instr) ? ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes; for (int i = 0; i < writemask_skipped_regs; i++) ra_add_node_interference(ctx->g, name, r0_xyz + i); } + + /* Pre-fetched textures have a lower limit for bits to encode dst + * register, so add additional interference with registers above + * that limit. + */ + if (instr->opc == OPC_META_TEX_PREFETCH) { + ra_add_node_interference(ctx->g, name, + ctx->prefetch_exclude_node); + } } foreach_use (name, ctx, instr) { @@ -1011,7 +1023,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx) arr->end_ip = 0; } - /* set up the r0.xyz precolor regs. */ for (int i = 0; i < 3; i++) { ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i); @@ -1019,6 +1030,12 @@ ra_add_interference(struct ir3_ra_ctx *ctx) ctx->set->first_half_reg + i); } + /* pre-color node that conflict with half/full regs higher than what + * can be encoded for tex-prefetch: + */ + ra_set_node_reg(ctx->g, ctx->prefetch_exclude_node, + ctx->set->prefetch_exclude_reg); + /* compute live ranges (use/def) on a block level, also updating * block's def/use bitmasks (used below to calculate per-block * livein/liveout): @@ -1105,60 +1122,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx) } } -/* some instructions need fix-up if dst register is half precision: */ -static void fixup_half_instr_dst(struct ir3_instruction *instr) -{ - switch (opc_cat(instr->opc)) { - case 1: /* move instructions */ - instr->cat1.dst_type = half_type(instr->cat1.dst_type); - break; - case 4: - switch (instr->opc) { - case OPC_RSQ: - instr->opc = OPC_HRSQ; - break; - case OPC_LOG2: - instr->opc = OPC_HLOG2; - break; - case OPC_EXP2: - instr->opc = OPC_HEXP2; - break; - default: - break; - } - break; - case 5: - instr->cat5.type = half_type(instr->cat5.type); - break; - } -} -/* some instructions need fix-up if src register is half precision: */ -static void fixup_half_instr_src(struct ir3_instruction *instr) -{ - switch (instr->opc) { - case OPC_MOV: - instr->cat1.src_type = half_type(instr->cat1.src_type); - break; - case OPC_MAD_F32: - instr->opc = OPC_MAD_F16; - break; - case OPC_SEL_B32: - instr->opc = OPC_SEL_B16; - break; - case OPC_SEL_S32: - instr->opc = OPC_SEL_S16; - break; - case OPC_SEL_F32: - instr->opc = OPC_SEL_F16; - break; - case OPC_SAD_S32: - instr->opc = OPC_SAD_S16; - break; - default: - break; - } -} - /* NOTE: instr could be NULL for IR3_REG_ARRAY case, for the first * array access(es) which do not have any previous access to depend * on from scheduling point of view @@ -1241,8 +1204,6 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) if (writes_gpr(instr)) { if (should_assign(ctx, instr)) { reg_assign(ctx, instr->regs[0], instr); - if (instr->regs[0]->flags & IR3_REG_HALF) - fixup_half_instr_dst(instr); } } @@ -1258,9 +1219,6 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) /* Note: reg->instr could be null for IR3_REG_ARRAY */ if (src || (reg->flags & IR3_REG_ARRAY)) reg_assign(ctx, instr->regs[n+1], src); - - if (instr->regs[n+1]->flags & IR3_REG_HALF) - fixup_half_instr_src(instr); } } @@ -1269,8 +1227,6 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) * them in the first pass: */ if (!ctx->scalar_pass) { - struct ir3_instruction *in, *out; - foreach_input (in, ctx->ir) { reg_assign(ctx, in->regs[0], in); } @@ -1354,13 +1310,6 @@ ra_precolor(struct ir3_ra_ctx *ctx, struct ir3_instruction **precolor, unsigned debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH))); - /* only consider the first component: */ - if (id->off > 0) - continue; - - if (ctx->scalar_pass && !should_assign(ctx, instr)) - continue; - /* 'base' is in scalar (class 0) but we need to map that * the conflicting register of the appropriate class (ie. * input could be vec2/vec3/etc) @@ -1379,6 +1328,9 @@ ra_precolor(struct ir3_ra_ctx *ctx, struct ir3_instruction **precolor, unsigned * .. and so on.. */ unsigned regid = instr->regs[0]->num; + assert(regid >= id->off); + regid -= id->off; + unsigned reg = ctx->set->gpr_to_ra_reg[id->cls][regid]; unsigned name = ra_name(ctx, id); ra_set_node_reg(ctx->g, name, reg); @@ -1532,7 +1484,8 @@ ir3_ra_pass(struct ir3_shader_variant *v, struct ir3_instruction **precolor, struct ir3_ra_ctx ctx = { .v = v, .ir = v->ir, - .set = v->ir->compiler->set, + .set = v->mergedregs ? + v->ir->compiler->mergedregs_set : v->ir->compiler->set, .scalar_pass = scalar_pass, }; int ret;