From: Christoph Bumiller Date: Mon, 7 Feb 2011 17:59:46 +0000 (+0100) Subject: nvc0: try to fix register conflicts for vector instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=92d8af582d2584ed95bbb4c7965812f7bc47c9ff;p=mesa.git nvc0: try to fix register conflicts for vector instructions Vector here means using multiple 32 bit regs which are forced to be consecutive in the register file. This still isn't quite nice. --- diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index 01ca95b0741..0756288daf7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -53,7 +53,8 @@ /** * BIND forces source operand i into the same register as destination operand i, - * and the operands will be assigned consecutive registers (needed for TEX) + * and the operands will be assigned consecutive registers (needed for TEX). + * Beware conflicts ! * SELECT forces its multiple source operands and its destination operand into * one and the same register. */ diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c index 57bf4b77f3e..a6791529fa7 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c @@ -1147,13 +1147,15 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy * neighbouring registers. CSE might have messed this up. + * Just generate a MOV for each source to avoid conflicts if they're used in + * multiple NV_OP_BIND at different positions. */ static int nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) { struct nv_value *val; struct nv_instruction *bnd, *nvi, *next; - int s, t; + int s; for (bnd = b->entry; bnd; bnd = next) { next = bnd->next; @@ -1161,20 +1163,17 @@ nv_pass_fix_bind(struct nv_pass *ctx, struct nv_basic_block *b) continue; for (s = 0; s < 4 && bnd->src[s]; ++s) { val = bnd->src[s]->value; - for (t = s + 1; t < 4 && bnd->src[t]; ++t) { - if (bnd->src[t]->value != val) - continue; - nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); - nvi->def[0] = new_value_like(ctx->pc, val); - nvi->def[0]->insn = nvi; - nv_reference(ctx->pc, nvi, 0, val); - nvc0_insn_insert_before(bnd, nvi); - nv_reference(ctx->pc, bnd, t, nvi->def[0]); - } + nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV); + nvi->def[0] = new_value_like(ctx->pc, val); + nvi->def[0]->insn = nvi; + nv_reference(ctx->pc, nvi, 0, val); + nv_reference(ctx->pc, bnd, s, nvi->def[0]); + + nvc0_insn_insert_before(bnd, nvi); } } - DESCEND_ARBITRARY(t, nv_pass_fix_bind); + DESCEND_ARBITRARY(s, nv_pass_fix_bind); return 0; } diff --git a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c index d24f09a1507..ee282680061 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_regalloc.c @@ -477,7 +477,7 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) break; case NV_OP_MOV: if ((iter == 2) && i->src[0]->value->insn && - !nv_is_texture_op(i->src[0]->value->join->insn->opcode)) + !nv_is_vector_op(i->src[0]->value->join->insn->opcode)) try_join_values(ctx, i->def[0], i->src[0]->value); break; case NV_OP_SELECT: @@ -488,18 +488,16 @@ pass_join_values(struct nv_pc_pass *ctx, int iter) do_join_values(ctx, i->def[0], i->src[c]->value); } break; - case NV_OP_TEX: - case NV_OP_TXB: - case NV_OP_TXL: - case NV_OP_TXQ: - /* on nvc0, TEX src and dst can differ */ - break; case NV_OP_BIND: if (iter) break; - for (c = 0; c < 6 && i->src[c]; ++c) + for (c = 0; c < 4 && i->src[c]; ++c) do_join_values(ctx, i->def[c], i->src[c]->value); break; + case NV_OP_TEX: + case NV_OP_TXB: + case NV_OP_TXL: + case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */ default: break; } @@ -730,6 +728,21 @@ nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set) set->pc = pc; } +/* We allocate registers for all defs of a vector instruction at once. + * Since we'll encounter all of them in the allocation loop, do the allocation + * when we're at the one with the live range that starts latest. + */ +static boolean +is_best_representative(struct nv_value *val) +{ + struct nv_instruction *nvi = val->insn; + int i; + for (i = 0; i < 4 && val->insn->def[i]; ++i) + if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn) + return FALSE; + return TRUE; +} + static void insert_ordered_tail(struct nv_value *list, struct nv_value *nval) { @@ -821,11 +834,13 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) boolean mem = FALSE; int v = nvi_vector_size(cur->insn); - if (v > 1) - mem = !reg_assign(&f, &cur->insn->def[0], v); - else + if (v > 1) { + if (is_best_representative(cur)) + mem = !reg_assign(&f, &cur->insn->def[0], v); + } else { if (iter) mem = !reg_assign(&f, &cur, 1); + } if (mem) { NOUVEAU_ERR("out of registers\n"); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index 9b5d4290787..f53af6c49c3 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1333,10 +1333,6 @@ emit_tex(struct bld_context *bld, uint opcode, int tic, int tsc, if (array) arg[dim] = bld_cvt(bld, NV_TYPE_U32, NV_TYPE_F32, arg[dim]); - /* ensure that all inputs reside in a GPR */ - for (c = 0; c < dim + array + cube + shadow; ++c) - (src[c] = bld_insn_1(bld, NV_OP_MOV, arg[c]))->insn->fixed = 1; - /* bind { layer x y z } and { lod/bias shadow } to adjacent regs */ bnd = new_instruction(bld->pc, NV_OP_BIND); @@ -1878,10 +1874,10 @@ bld_instruction(struct bld_context *bld, } for (c = 0; c < 4; ++c) - if ((mask & (1 << c)) && - ((dst0[c]->reg.file == NV_FILE_IMM) || - (dst0[c]->reg.id == 63 && dst0[c]->reg.file == NV_FILE_GPR))) - dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); + if (mask & (1 << c)) + if ((dst0[c]->reg.file == NV_FILE_IMM) || + (dst0[c]->reg.file == NV_FILE_GPR && dst0[c]->reg.id == 63)) + dst0[c] = bld_insn_1(bld, NV_OP_MOV, dst0[c]); c = 0; if ((mask & 0x3) == 0x3) {