X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnv50%2Fnv50_pc_optimize.c;h=d72b23c137a74cd61b32ea0f5acd0266c89c07ee;hb=1eb957bb4108123bea95b818e0544e3b5f255e08;hp=ea1da6268da99a4e9b0180a68c2e31d32c32caa4;hpb=98c87c382d080ff5a048564e942e649fbaf43879;p=mesa.git diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index ea1da6268da..d72b23c137a 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -20,8 +20,6 @@ * SOFTWARE. */ -/* #define NV50PC_DEBUG */ - #include "nv50_pc.h" #define DESCEND_ARBITRARY(j, f) \ @@ -116,7 +114,7 @@ nvi_isnop(struct nv_instruction *nvi) return FALSE; if (nvi->src[0]->value->join->reg.id < 0) { - NV50_DBGMSG("nvi_isnop: orphaned value detected\n"); + NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n"); return TRUE; } @@ -145,8 +143,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) int j; uint size, n32 = 0; + /* find first non-empty block emitted before b */ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j); - if (j >= 0) { + for (; j >= 0; --j) { in = pc->bb_list[j]; /* check for no-op branches (BRA $PC+8) */ @@ -160,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nv_nvi_delete(in->exit); } b->bin_pos = in->bin_pos + in->bin_size; + + if (in->bin_size) /* no more no-op branches to b */ + break; } pc->bb_list[pc->num_blocks++] = b; @@ -197,7 +199,7 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) } if (!b->entry) { - NV50_DBGMSG("block %p is now empty\n", b); + NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b); } else if (!b->exit->is_long) { assert(n32); @@ -236,7 +238,7 @@ nv_pc_exec_pass2(struct nv_pc *pc) { int i, ret; - NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks); + NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks); pc->num_blocks = 0; /* will reorder bb_list */ @@ -299,7 +301,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) } if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0) - nvi->set_cond = cc_swapped[nvi->set_cond]; + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static int @@ -336,6 +338,7 @@ nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b) continue; nvi->def[0] = sti->def[0]; + nvi->def[0]->insn = nvi; nvi->fixed = sti->fixed; nv_nvi_delete(sti); @@ -374,7 +377,7 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) if (j == 0 && ld->src[4]) /* can't load shared mem */ continue; - /* fold it ! */ /* XXX: ref->insn */ + /* fold it ! */ nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value); if (ld->src[4]) nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value); @@ -388,6 +391,7 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } +/* NOTE: Assumes loads have not yet been folded. */ static int nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) { @@ -402,14 +406,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) nvi->src[1]->mod ^= NV_MOD_NEG; } - /* should not put any modifiers on NEG and ABS */ - assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod); - assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod); - - for (j = 0; j < 4; ++j) { - if (!nvi->src[j]) - break; - + for (j = 0; j < 4 && nvi->src[j]; ++j) { mi = nvi->src[j]->value->insn; if (!mi) continue; @@ -421,16 +418,32 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS; else continue; + assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); - if (nvi->opcode == NV_OP_ABS) + mod |= mi->src[0]->mod; + + if (mi->flags_def || mi->flags_src) + continue; + + if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { + /* abs neg [abs] = abs */ mod &= ~(NV_MOD_NEG | NV_MOD_ABS); - else - if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) { - nvi->opcode = NV_OP_MOV; + } else + if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { + /* neg as opcode and modifier on same insn cannot occur */ + /* neg neg abs = abs, neg neg = identity */ + assert(j == 0); + if (mod & NV_MOD_ABS) + nvi->opcode = NV_OP_ABS; + else + if (nvi->flags_def) + nvi->opcode = NV_OP_CVT; + else + nvi->opcode = NV_OP_MOV; mod = 0; } - if (!(nv50_supported_src_mods(nvi->opcode, j) & mod)) + if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod) continue; nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value); @@ -441,11 +454,15 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (nvi->opcode == NV_OP_SAT) { mi = nvi->src[0]->value->insn; - if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) { - mi->saturate = 1; - mi->def[0] = nvi->def[0]; - nv_nvi_delete(nvi); - } + if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD) + continue; + if (mi->flags_def || mi->def[0]->refc > 1) + continue; + + mi->saturate = 1; + mi->def[0] = nvi->def[0]; + mi->def[0]->insn = mi; + nv_nvi_delete(nvi); } } DESCEND_ARBITRARY(j, nv_pass_lower_mods); @@ -562,6 +579,11 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, nvi->src[0] = nvi->src[2]; nvi->src[2] = NULL; nvi->opcode = NV_OP_ADD; + + if (val->reg.imm.u32 == 0) { + nvi->src[1] = NULL; + nvi->opcode = NV_OP_MOV; + } } } @@ -703,6 +725,10 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) else continue; + /* could have an immediate from above constant_* */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + continue; + nvi->opcode = NV_OP_MAD; mod = nvi->src[(src == src0) ? 0 : 1]->mod; nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL); @@ -723,7 +749,7 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) struct load_record { struct load_record *next; - uint64_t data; + uint64_t data[2]; struct nv_value *value; }; @@ -748,7 +774,7 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) { struct load_record **rec, *it; struct nv_instruction *ld, *next; - uint64_t data; + uint64_t data[2]; struct nv_value *val; int j; @@ -760,11 +786,13 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) rec = NULL; if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { - data = val->reg.id; + data[0] = val->reg.id; + data[1] = 0; rec = &ctx->mem_v; } else if (ld->opcode == NV_OP_LDA) { - data = val->reg.id; + data[0] = val->reg.id; + data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; if (val->reg.file >= NV_FILE_MEM_C(0) && val->reg.file <= NV_FILE_MEM_C(15)) rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; @@ -776,7 +804,8 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) rec = &ctx->mem_l; } else if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { - data = val->reg.imm.u32; + data[0] = val->reg.imm.u32; + data[1] = 0; rec = &ctx->imm; } @@ -784,7 +813,7 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) continue; for (it = *rec; it; it = it->next) - if (it->data == data) + if (it->data[0] == data[0] && it->data[1] == data[1]) break; if (it) { @@ -798,7 +827,8 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) continue; it = &ctx->pool[ctx->alloc++]; it->next = *rec; - it->data = data; + it->data[0] = data[0]; + it->data[1] = data[1]; it->value = ld->def[0]; *rec = it; } @@ -934,7 +964,8 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) if (bb_is_if_else_endif(b)) { - NV50_DBGMSG("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); + NV50_DBGMSG(PROG_IR, + "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) if (!nv50_nvi_can_predicate(nvi)) @@ -943,7 +974,7 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) if (!nv50_nvi_can_predicate(nvi)) break; -#ifdef NV50_PC_DEBUG +#if NV50_DEBUG & NV50_DEBUG_PROG_IR if (nvi) { debug_printf("cannot predicate: "); nv_print_instruction(nvi); } @@ -1068,6 +1099,11 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) if (ret) return ret; + pc->pass_seq++; + ret = nv_pass_lower_mods(&pass, root); + if (ret) + return ret; + pc->pass_seq++; ret = nv_pass_fold_loads(&pass, root); if (ret) @@ -1093,11 +1129,6 @@ nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) if (ret) return ret; - pc->pass_seq++; - ret = nv_pass_lower_mods(&pass, root); - if (ret) - return ret; - dce.pc = pc; do { dce.removed = 0;