X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnv50%2Fnv50_pc_optimize.c;h=d72b23c137a74cd61b32ea0f5acd0266c89c07ee;hb=1eb957bb4108123bea95b818e0544e3b5f255e08;hp=80f3bb34b057cd91623bcc2ebc35266b4cb6ca87;hpb=3e27785f3ebe6620805f97cb5c17ec8bd28bc1e8;p=mesa.git diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 80f3bb34b05..d72b23c137a 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -80,6 +80,8 @@ inst_commutation_legal(struct nv_instruction *a, static INLINE boolean inst_cullable(struct nv_instruction *nvi) { + if (nvi->opcode == NV_OP_STA) + return FALSE; return (!(nvi->is_terminator || nvi->is_join || nvi->target || nvi->fixed || @@ -92,14 +94,17 @@ nvi_isnop(struct nv_instruction *nvi) if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF) return TRUE; - if (nvi->fixed || - nvi->is_terminator || - nvi->flags_src || + /* NOTE: 'fixed' now only means that it shouldn't be optimized away, + * but we can still remove it if it is a no-op move. + */ + if (/* nvi->fixed || */ + /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */ nvi->flags_def || + nvi->is_terminator || nvi->is_join) return FALSE; - if (nvi->def[0]->join->reg.id < 0) + if (nvi->def[0] && nvi->def[0]->join->reg.id < 0) return TRUE; if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT) @@ -109,7 +114,7 @@ nvi_isnop(struct nv_instruction *nvi) return FALSE; if (nvi->src[0]->value->join->reg.id < 0) { - debug_printf("nvi_isnop: orphaned value detected\n"); + NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n"); return TRUE; } @@ -138,8 +143,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) int j; uint size, n32 = 0; + /* find first non-empty block emitted before b */ for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j); - if (j >= 0) { + for (; j >= 0; --j) { in = pc->bb_list[j]; /* check for no-op branches (BRA $PC+8) */ @@ -153,6 +159,9 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nv_nvi_delete(in->exit); } b->bin_pos = in->bin_pos + in->bin_size; + + if (in->bin_size) /* no more no-op branches to b */ + break; } pc->bb_list[pc->num_blocks++] = b; @@ -176,9 +185,6 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) nv50_inst_min_size(nvi->next) == 4 && inst_commutation_legal(nvi, nvi->next)) { ++n32; - debug_printf("permuting: "); - nv_print_instruction(nvi); - nv_print_instruction(nvi->next); nv_nvi_permute(nvi, nvi->next); next = nvi; } else { @@ -193,7 +199,7 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) } if (!b->entry) { - debug_printf("block %p is now empty\n", b); + NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b); } else if (!b->exit->is_long) { assert(n32); @@ -211,23 +217,34 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) pc->bin_size += b->bin_size *= 4; } -int -nv_pc_exec_pass2(struct nv_pc *pc) +static int +nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) { struct nv_pass pass; pass.pc = pc; pc->pass_seq++; - nv_pass_flatten(&pass, pc->root); - debug_printf("preparing %u blocks for emission\n", pc->num_blocks); + nv_pass_flatten(&pass, root); + + nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); - pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *)); - pc->num_blocks = 0; + return 0; +} + +int +nv_pc_exec_pass2(struct nv_pc *pc) +{ + int i, ret; + + NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks); - nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc); + pc->num_blocks = 0; /* will reorder bb_list */ + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) + return ret; return 0; } @@ -265,11 +282,8 @@ check_swap_src_0_1(struct nv_instruction *nvi) return; assert(src0 && src1); - if (src1->value->reg.file == NV_FILE_IMM) { - /* should only be present from folding a constant MUL part of a MAD */ - assert(nvi->opcode == NV_OP_ADD); + if (src1->value->reg.file == NV_FILE_IMM) return; - } if (is_cmem_load(src0->value->insn)) { if (!is_cmem_load(src1->value->insn)) { @@ -287,7 +301,7 @@ check_swap_src_0_1(struct nv_instruction *nvi) } if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0) - nvi->set_cond = cc_swapped[nvi->set_cond]; + nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7]; } static int @@ -306,21 +320,25 @@ nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b) continue; nvi = sti->src[0]->value->insn; - if (!nvi || nvi->opcode == NV_OP_PHI) + if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode)) continue; assert(nvi->def[0] == sti->src[0]->value); + if (nvi->opcode == NV_OP_SELECT) + continue; if (nvi->def[0]->refc > 1) continue; /* cannot write to $oX when using immediate */ for (j = 0; j < 4 && nvi->src[j]; ++j) - if (nvi->src[j]->value->reg.file == NV_FILE_IMM) + if (nvi->src[j]->value->reg.file == NV_FILE_IMM || + nvi->src[j]->value->reg.file == NV_FILE_MEM_L) break; if (j < 4 && nvi->src[j]) continue; nvi->def[0] = sti->def[0]; + nvi->def[0]->insn = nvi; nvi->fixed = sti->fixed; nv_nvi_delete(sti); @@ -359,10 +377,13 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) if (j == 0 && ld->src[4]) /* can't load shared mem */ continue; - /* fold it ! */ /* XXX: ref->insn */ + /* fold it ! */ nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value); if (ld->src[4]) nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value); + + if (!nv_nvi_refcount(ld)) + nv_nvi_delete(ld); } } DESCEND_ARBITRARY(j, nv_pass_fold_loads); @@ -370,6 +391,7 @@ nv_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } +/* NOTE: Assumes loads have not yet been folded. */ static int nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) { @@ -384,14 +406,7 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) nvi->src[1]->mod ^= NV_MOD_NEG; } - /* should not put any modifiers on NEG and ABS */ - assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod); - assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod); - - for (j = 0; j < 4; ++j) { - if (!nvi->src[j]) - break; - + for (j = 0; j < 4 && nvi->src[j]; ++j) { mi = nvi->src[j]->value->insn; if (!mi) continue; @@ -403,16 +418,32 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS; else continue; + assert(!(mod & mi->src[0]->mod & NV_MOD_NEG)); - if (nvi->opcode == NV_OP_ABS) + mod |= mi->src[0]->mod; + + if (mi->flags_def || mi->flags_src) + continue; + + if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) { + /* abs neg [abs] = abs */ mod &= ~(NV_MOD_NEG | NV_MOD_ABS); - else - if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) { - nvi->opcode = NV_OP_MOV; + } else + if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) { + /* neg as opcode and modifier on same insn cannot occur */ + /* neg neg abs = abs, neg neg = identity */ + assert(j == 0); + if (mod & NV_MOD_ABS) + nvi->opcode = NV_OP_ABS; + else + if (nvi->flags_def) + nvi->opcode = NV_OP_CVT; + else + nvi->opcode = NV_OP_MOV; mod = 0; } - if (!(nv50_supported_src_mods(nvi->opcode, j) & mod)) + if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod) continue; nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value); @@ -423,11 +454,15 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) if (nvi->opcode == NV_OP_SAT) { mi = nvi->src[0]->value->insn; - if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) { - mi->saturate = 1; - mi->def[0] = nvi->def[0]; - nv_nvi_delete(nvi); - } + if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD) + continue; + if (mi->flags_def || mi->def[0]->refc > 1) + continue; + + mi->saturate = 1; + mi->def[0] = nvi->def[0]; + mi->def[0]->insn = mi; + nv_nvi_delete(nvi); } } DESCEND_ARBITRARY(j, nv_pass_lower_mods); @@ -437,22 +472,6 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b) #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL) -static struct nv_value * -find_immediate(struct nv_ref *ref) -{ - struct nv_value *src; - - if (!ref) - return NULL; - - src = ref->value; - while (src->insn && src->insn->opcode == NV_OP_MOV) { - assert(!src->insn->src[0]->mod); - src = src->insn->src[0]->value; - } - return (src->reg.file == NV_FILE_IMM) ? src : NULL; -} - static void modifiers_apply(uint32_t *val, ubyte type, ubyte mod) { @@ -505,7 +524,7 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, u1.u32 = src1->reg.imm.u32; modifiers_apply(&u0.u32, type, nvi->src[0]->mod); - modifiers_apply(&u0.u32, type, nvi->src[1]->mod); + modifiers_apply(&u1.u32, type, nvi->src[1]->mod); switch (nvi->opcode) { case NV_OP_MAD: @@ -534,9 +553,9 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, break; case NV_OP_SUB: switch (type) { - case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; - case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; - case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; + case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break; + case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break; + case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break; default: assert(0); break; @@ -560,6 +579,11 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi, nvi->src[0] = nvi->src[2]; nvi->src[2] = NULL; nvi->opcode = NV_OP_ADD; + + if (val->reg.imm.u32 == 0) { + nvi->src[1] = NULL; + nvi->opcode = NV_OP_MOV; + } } } @@ -650,6 +674,15 @@ constant_operand(struct nv_pc *pc, default: break; } + + if (nvi->opcode == NV_OP_MOV && nvi->flags_def) { + struct nv_instruction *cvt = new_instruction_at(pc, nvi, NV_OP_CVT); + + nv_reference(pc, &cvt->src[0], nvi->def[0]); + + cvt->flags_def = nvi->flags_def; + nvi->flags_def = NULL; + } } static int @@ -664,8 +697,8 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) next = nvi->next; - src0 = find_immediate(nvi->src[0]); - src1 = find_immediate(nvi->src[1]); + src0 = nvcg_find_immediate(nvi->src[0]); + src1 = nvcg_find_immediate(nvi->src[1]); if (src0 && src1) constant_expression(ctx->pc, nvi, src0, src1); @@ -692,6 +725,10 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) else continue; + /* could have an immediate from above constant_* */ + if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR) + continue; + nvi->opcode = NV_OP_MAD; mod = nvi->src[(src == src0) ? 0 : 1]->mod; nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL); @@ -708,26 +745,11 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } -/* -set $r2 g f32 $r2 $r3 -cvt abs rn f32 $r2 s32 $r2 -cvt f32 $c0 # f32 $r2 -e $c0 bra 0x80 -*/ -#if 0 -static int -nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b) -{ - /* XXX: easier in IR builder for now */ - return 0; -} -#endif - /* TODO: redundant store elimination */ struct load_record { struct load_record *next; - uint64_t data; + uint64_t data[2]; struct nv_value *value; }; @@ -746,12 +768,13 @@ struct nv_pass_reld_elim { int alloc; }; +/* TODO: properly handle loads from l[] memory in the presence of stores */ static int nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) { struct load_record **rec, *it; struct nv_instruction *ld, *next; - uint64_t data; + uint64_t data[2]; struct nv_value *val; int j; @@ -763,11 +786,13 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) rec = NULL; if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) { - data = val->reg.id; + data[0] = val->reg.id; + data[1] = 0; rec = &ctx->mem_v; } else if (ld->opcode == NV_OP_LDA) { - data = val->reg.id; + data[0] = val->reg.id; + data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL; if (val->reg.file >= NV_FILE_MEM_C(0) && val->reg.file <= NV_FILE_MEM_C(15)) rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)]; @@ -779,7 +804,8 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) rec = &ctx->mem_l; } else if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) { - data = val->reg.imm.u32; + data[0] = val->reg.imm.u32; + data[1] = 0; rec = &ctx->imm; } @@ -787,20 +813,22 @@ nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b) continue; for (it = *rec; it; it = it->next) - if (it->data == data) + if (it->data[0] == data[0] && it->data[1] == data[1]) break; if (it) { if (ld->def[0]->reg.id >= 0) it->value = ld->def[0]; else + if (!ld->fixed) nvcg_replace_value(ctx->pc, ld->def[0], it->value); } else { if (ctx->alloc == LOAD_RECORD_POOL_SIZE) continue; it = &ctx->pool[ctx->alloc++]; it->next = *rec; - it->data = data; + it->data[0] = data[0]; + it->data[1] = data[1]; it->value = ld->def[0]; *rec = it; } @@ -936,7 +964,8 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) if (bb_is_if_else_endif(b)) { - debug_printf("pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); + NV50_DBGMSG(PROG_IR, + "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id); for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0) if (!nv50_nvi_can_predicate(nvi)) @@ -945,11 +974,13 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1) if (!nv50_nvi_can_predicate(nvi)) break; +#if NV50_DEBUG & NV50_DEBUG_PROG_IR if (nvi) { debug_printf("cannot predicate: "); nv_print_instruction(nvi); } } else { debug_printf("cannot predicate: "); nv_print_instruction(nvi); +#endif } if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */ @@ -965,7 +996,9 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b) if (b->exit && b->exit->opcode == NV_OP_JOINAT) nv_nvi_delete(b->exit); - if ((nvi = b->out[0]->out[0]->entry)) { + i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0; + + if ((nvi = b->out[0]->out[i]->entry)) { nvi->is_join = 0; if (nvi->opcode == NV_OP_JOIN) nv_nvi_delete(nvi); @@ -991,10 +1024,11 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) for (ir = entry; ir; ir = next) { next = ir->next; for (ik = entry; ik != ir; ik = ik->next) { - if (ir->opcode != ik->opcode) + if (ir->opcode != ik->opcode || ir->fixed) continue; - if (ik->opcode == NV_OP_LDA || + if (!ir->def[0] || !ik->def[0] || + ik->opcode == NV_OP_LDA || ik->opcode == NV_OP_STA || ik->opcode == NV_OP_MOV || nv_is_vector_op(ik->opcode)) @@ -1007,8 +1041,6 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) ik->flags_def || ir->flags_def) continue; /* and also not with flags, for now */ - assert(ik->def[0] && ir->def[0]); - if (ik->def[0]->reg.file == NV_FILE_OUT || ir->def[0]->reg.file == NV_FILE_OUT || !values_equal(ik->def[0], ir->def[0])) @@ -1048,8 +1080,8 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } -int -nv_pc_exec_pass0(struct nv_pc *pc) +static int +nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) { struct nv_pass_reld_elim *reldelim; struct nv_pass pass; @@ -1063,35 +1095,37 @@ nv_pc_exec_pass0(struct nv_pc *pc) * to whether sources are supported memory loads. */ pc->pass_seq++; - ret = nv_pass_lower_arith(&pass, pc->root); + ret = nv_pass_lower_arith(&pass, root); if (ret) return ret; pc->pass_seq++; - ret = nv_pass_fold_loads(&pass, pc->root); + ret = nv_pass_lower_mods(&pass, root); if (ret) return ret; pc->pass_seq++; - ret = nv_pass_fold_stores(&pass, pc->root); + ret = nv_pass_fold_loads(&pass, root); if (ret) return ret; - reldelim = CALLOC_STRUCT(nv_pass_reld_elim); - reldelim->pc = pc; pc->pass_seq++; - ret = nv_pass_reload_elim(reldelim, pc->root); - FREE(reldelim); + ret = nv_pass_fold_stores(&pass, root); if (ret) return ret; - pc->pass_seq++; - ret = nv_pass_cse(&pass, pc->root); - if (ret) - return ret; + if (pc->opt_reload_elim) { + reldelim = CALLOC_STRUCT(nv_pass_reld_elim); + reldelim->pc = pc; + pc->pass_seq++; + ret = nv_pass_reload_elim(reldelim, root); + FREE(reldelim); + if (ret) + return ret; + } pc->pass_seq++; - ret = nv_pass_lower_mods(&pass, pc->root); + ret = nv_pass_cse(&pass, root); if (ret) return ret; @@ -1099,14 +1133,25 @@ nv_pc_exec_pass0(struct nv_pc *pc) do { dce.removed = 0; pc->pass_seq++; - ret = nv_pass_dce(&dce, pc->root); + ret = nv_pass_dce(&dce, root); if (ret) return ret; } while (dce.removed); - ret = nv_pass_tex_mask(&pass, pc->root); + ret = nv_pass_tex_mask(&pass, root); if (ret) return ret; return ret; } + +int +nv_pc_exec_pass0(struct nv_pc *pc) +{ + int i, ret; + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) + return ret; + return 0; +}