static INLINE boolean
inst_cullable(struct nv_instruction *nvi)
{
- return (!(nvi->is_terminator ||
+ if (nvi->opcode == NV_OP_STA)
+ return FALSE;
+ return (!(nvi->is_terminator || nvi->is_join ||
nvi->target ||
nvi->fixed ||
nv_nvi_refcount(nvi)));
static INLINE boolean
nvi_isnop(struct nv_instruction *nvi)
{
- if (nvi->opcode == NV_OP_EXPORT)
+ if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
return TRUE;
- if (nvi->fixed ||
+ /* NOTE: 'fixed' now only means that it shouldn't be optimized away,
+ * but we can still remove it if it is a no-op move.
+ */
+ if (/* nvi->fixed || */
+ /* nvi->flags_src || */ /* cond. MOV to same register is still NOP */
+ nvi->flags_def ||
nvi->is_terminator ||
- nvi->flags_src ||
- nvi->flags_def)
+ nvi->is_join)
return FALSE;
- if (nvi->def[0]->join->reg.id < 0)
+ if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
return TRUE;
if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
return FALSE;
if (nvi->src[0]->value->join->reg.id < 0) {
- debug_printf("nvi_isnop: orphaned value detected\n");
+ NV50_DBGMSG(PROG_IR, "nvi_isnop: orphaned value detected\n");
return TRUE;
}
return values_equal(nvi->def[0], nvi->src[0]->value);
}
+struct nv_pass {
+ struct nv_pc *pc;
+ int n;
+ void *priv;
+};
+
+static int
+nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
+
static void
-nv_pc_pass_pre_emission(struct nv_pc *pc, struct nv_basic_block *b)
+nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
{
+ struct nv_pc *pc = (struct nv_pc *)priv;
struct nv_basic_block *in;
struct nv_instruction *nvi, *next;
int j;
uint size, n32 = 0;
- b->priv = 0;
-
+ /* find first non-empty block emitted before b */
for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->bin_size; --j);
- if (j >= 0) {
+ for (; j >= 0; --j) {
in = pc->bb_list[j];
/* check for no-op branches (BRA $PC+8) */
nv_nvi_delete(in->exit);
}
b->bin_pos = in->bin_pos + in->bin_size;
+
+ if (in->bin_size) /* no more no-op branches to b */
+ break;
}
pc->bb_list[pc->num_blocks++] = b;
nv50_inst_min_size(nvi->next) == 4 &&
inst_commutation_legal(nvi, nvi->next)) {
++n32;
- debug_printf("permuting: ");
- nv_print_instruction(nvi);
- nv_print_instruction(nvi->next);
nv_nvi_permute(nvi, nvi->next);
next = nvi;
} else {
}
if (!b->entry) {
- debug_printf("block %p is now empty\n", b);
+ NV50_DBGMSG(PROG_IR, "block %p is now empty\n", b);
} else
if (!b->exit->is_long) {
assert(n32);
assert(!b->entry || (b->exit && b->exit->is_long));
pc->bin_size += b->bin_size *= 4;
+}
- /* descend CFG */
+static int
+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
+{
+ struct nv_pass pass;
- if (!b->out[0])
- return;
- if (!b->out[1] && ++(b->out[0]->priv) != b->out[0]->num_in)
- return;
+ pass.pc = pc;
+
+ pc->pass_seq++;
- for (j = 0; j < 2; ++j)
- if (b->out[j] && b->out[j] != b)
- nv_pc_pass_pre_emission(pc, b->out[j]);
+ nv_pass_flatten(&pass, root);
+
+ nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
+
+ return 0;
}
int
nv_pc_exec_pass2(struct nv_pc *pc)
{
- debug_printf("preparing %u blocks for emission\n", pc->num_blocks);
+ int i, ret;
- pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
-
- pc->num_blocks = 0;
- nv_pc_pass_pre_emission(pc, pc->root);
+ NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks);
+ pc->num_blocks = 0; /* will reorder bb_list */
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
+ return ret;
return 0;
}
return;
assert(src0 && src1);
+ if (src1->value->reg.file == NV_FILE_IMM)
+ return;
+
if (is_cmem_load(src0->value->insn)) {
if (!is_cmem_load(src1->value->insn)) {
nvi->src[0] = src1;
- nvi->src[1] = src0;
- /* debug_printf("swapping cmem load to 1\n"); */
+ nvi->src[1] = src0;
+ /* debug_printf("swapping cmem load to 1\n"); */
}
} else
if (is_smem_load(src1->value->insn)) {
if (!is_smem_load(src0->value->insn)) {
nvi->src[0] = src1;
- nvi->src[1] = src0;
- /* debug_printf("swapping smem load to 0\n"); */
+ nvi->src[1] = src0;
+ /* debug_printf("swapping smem load to 0\n"); */
}
}
if (nvi->opcode == NV_OP_SET && nvi->src[0] != src0)
- nvi->set_cond = cc_swapped[nvi->set_cond];
+ nvi->set_cond = (nvi->set_cond & ~7) | cc_swapped[nvi->set_cond & 7];
}
-struct nv_pass {
- struct nv_pc *pc;
- int n;
- void *priv;
-};
-
static int
nv_pass_fold_stores(struct nv_pass *ctx, struct nv_basic_block *b)
{
- struct nv_instruction *nvi, *sti;
+ struct nv_instruction *nvi, *sti, *next;
int j;
- for (sti = b->entry; sti; sti = sti->next) {
- if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
- continue;
+ for (sti = b->entry; sti; sti = next) {
+ next = sti->next;
/* only handling MOV to $oX here */
+ if (!sti->def[0] || sti->def[0]->reg.file != NV_FILE_OUT)
+ continue;
if (sti->opcode != NV_OP_MOV && sti->opcode != NV_OP_STA)
continue;
nvi = sti->src[0]->value->insn;
- if (!nvi || nvi->opcode == NV_OP_PHI)
+ if (!nvi || nvi->opcode == NV_OP_PHI || nv_is_vector_op(nvi->opcode))
continue;
assert(nvi->def[0] == sti->src[0]->value);
+ if (nvi->opcode == NV_OP_SELECT)
+ continue;
if (nvi->def[0]->refc > 1)
continue;
- /* cannot MOV immediate to $oX */
- if (nvi->src[0]->value->reg.file == NV_FILE_IMM)
+ /* cannot write to $oX when using immediate */
+ for (j = 0; j < 4 && nvi->src[j]; ++j)
+ if (nvi->src[j]->value->reg.file == NV_FILE_IMM ||
+ nvi->src[j]->value->reg.file == NV_FILE_MEM_L)
+ break;
+ if (j < 4 && nvi->src[j])
continue;
nvi->def[0] = sti->def[0];
- sti->def[0] = NULL;
+ nvi->def[0]->insn = nvi;
nvi->fixed = sti->fixed;
- sti->fixed = 0;
+
+ nv_nvi_delete(sti);
}
DESCEND_ARBITRARY(j, nv_pass_fold_stores);
if (is_immd_move(ld) && nv50_nvi_can_use_imm(nvi, j)) {
nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
- debug_printf("folded immediate %i\n", ld->def[0]->n);
continue;
}
if (j == 0 && ld->src[4]) /* can't load shared mem */
continue;
- /* fold it ! */ /* XXX: ref->insn */
+ /* fold it ! */
nv_reference(ctx->pc, &nvi->src[j], ld->src[0]->value);
if (ld->src[4])
nv_reference(ctx->pc, &nvi->src[4], ld->src[4]->value);
+
+ if (!nv_nvi_refcount(ld))
+ nv_nvi_delete(ld);
}
}
DESCEND_ARBITRARY(j, nv_pass_fold_loads);
return 0;
}
+/* NOTE: Assumes loads have not yet been folded. */
static int
nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
{
nvi->src[1]->mod ^= NV_MOD_NEG;
}
- /* should not put any modifiers on NEG and ABS */
- assert(nvi->opcode != NV_MOD_NEG || !nvi->src[0]->mod);
- assert(nvi->opcode != NV_MOD_ABS || !nvi->src[0]->mod);
-
- for (j = 0; j < 4; ++j) {
- if (!nvi->src[j])
- break;
-
+ for (j = 0; j < 4 && nvi->src[j]; ++j) {
mi = nvi->src[j]->value->insn;
if (!mi)
continue;
if (mi->opcode == NV_OP_ABS) mod = NV_MOD_ABS;
else
continue;
+ assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
+
+ mod |= mi->src[0]->mod;
+
+ if (mi->flags_def || mi->flags_src)
+ continue;
- if (nvi->opcode == NV_OP_ABS)
+ if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
+ /* abs neg [abs] = abs */
mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
- else
- if (nvi->opcode == NV_OP_NEG && mod == NV_MOD_NEG) {
- nvi->opcode = NV_OP_MOV;
+ } else
+ if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
+ /* neg as opcode and modifier on same insn cannot occur */
+ /* neg neg abs = abs, neg neg = identity */
+ assert(j == 0);
+ if (mod & NV_MOD_ABS)
+ nvi->opcode = NV_OP_ABS;
+ else
+ if (nvi->flags_def)
+ nvi->opcode = NV_OP_CVT;
+ else
+ nvi->opcode = NV_OP_MOV;
mod = 0;
}
- if (!(nv50_supported_src_mods(nvi->opcode, j) & mod))
+ if ((nv50_supported_src_mods(nvi->opcode, j) & mod) != mod)
continue;
nv_reference(ctx->pc, &nvi->src[j], mi->src[0]->value);
if (nvi->opcode == NV_OP_SAT) {
mi = nvi->src[0]->value->insn;
- if ((mi->opcode == NV_OP_MAD) && !mi->flags_def) {
- mi->saturate = 1;
- mi->def[0] = nvi->def[0];
- nv_nvi_delete(nvi);
- }
+ if (mi->opcode != NV_OP_ADD && mi->opcode != NV_OP_MAD)
+ continue;
+ if (mi->flags_def || mi->def[0]->refc > 1)
+ continue;
+
+ mi->saturate = 1;
+ mi->def[0] = nvi->def[0];
+ mi->def[0]->insn = mi;
+ nv_nvi_delete(nvi);
}
}
DESCEND_ARBITRARY(j, nv_pass_lower_mods);
#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
-static struct nv_value *
-find_immediate(struct nv_ref *ref)
+static void
+modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
{
- struct nv_value *src;
+ if (mod & NV_MOD_ABS) {
+ if (type == NV_TYPE_F32)
+ *val &= 0x7fffffff;
+ else
+ if ((*val) & (1 << 31))
+ *val = ~(*val) + 1;
+ }
+ if (mod & NV_MOD_NEG) {
+ if (type == NV_TYPE_F32)
+ *val ^= 0x80000000;
+ else
+ *val = ~(*val) + 1;
+ }
+}
- if (!ref)
- return NULL;
+static INLINE uint
+modifiers_opcode(ubyte mod)
+{
+ switch (mod) {
+ case NV_MOD_NEG: return NV_OP_NEG;
+ case NV_MOD_ABS: return NV_OP_ABS;
+ case 0:
+ return NV_OP_MOV;
+ default:
+ return NV_OP_NOP;
+ }
+}
+
+static void
+constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
+ struct nv_value *src0, struct nv_value *src1)
+{
+ struct nv_value *val;
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u0, u1, u;
+ ubyte type;
- src = ref->value;
- while (src->insn && src->insn->opcode == NV_OP_MOV) {
- assert(!src->insn->src[0]->mod);
- src = src->insn->src[0]->value;
+ if (!nvi->def[0])
+ return;
+ type = nvi->def[0]->reg.type;
+
+ u.u32 = 0;
+ u0.u32 = src0->reg.imm.u32;
+ u1.u32 = src1->reg.imm.u32;
+
+ modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
+ modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
+
+ switch (nvi->opcode) {
+ case NV_OP_MAD:
+ if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
+ return;
+ /* fall through */
+ case NV_OP_MUL:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 * u1.f32; break;
+ case NV_TYPE_U32: u.u32 = u0.u32 * u1.u32; break;
+ case NV_TYPE_S32: u.s32 = u0.s32 * u1.s32; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case NV_OP_ADD:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 + u1.f32; break;
+ case NV_TYPE_U32: u.u32 = u0.u32 + u1.u32; break;
+ case NV_TYPE_S32: u.s32 = u0.s32 + u1.s32; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ case NV_OP_SUB:
+ switch (type) {
+ case NV_TYPE_F32: u.f32 = u0.f32 - u1.f32; break;
+ case NV_TYPE_U32: u.u32 = u0.u32 - u1.u32; break;
+ case NV_TYPE_S32: u.s32 = u0.s32 - u1.s32; break;
+ default:
+ assert(0);
+ break;
+ }
+ break;
+ default:
+ return;
+ }
+
+ nvi->opcode = NV_OP_MOV;
+
+ val = new_value(pc, NV_FILE_IMM, type);
+
+ val->reg.imm.u32 = u.u32;
+
+ nv_reference(pc, &nvi->src[1], NULL);
+ nv_reference(pc, &nvi->src[0], val);
+
+ if (nvi->src[2]) { /* from MAD */
+ nvi->src[1] = nvi->src[0];
+ nvi->src[0] = nvi->src[2];
+ nvi->src[2] = NULL;
+ nvi->opcode = NV_OP_ADD;
+
+ if (val->reg.imm.u32 == 0) {
+ nvi->src[1] = NULL;
+ nvi->opcode = NV_OP_MOV;
+ }
}
- return (src->reg.file == NV_FILE_IMM) ? src : NULL;
}
static void
constant_operand(struct nv_pc *pc,
struct nv_instruction *nvi, struct nv_value *val, int s)
{
+ union {
+ float f32;
+ uint32_t u32;
+ int32_t s32;
+ } u;
int t = s ? 0 : 1;
+ uint op;
ubyte type;
if (!nvi->def[0])
return;
type = nvi->def[0]->reg.type;
+ u.u32 = val->reg.imm.u32;
+ modifiers_apply(&u.u32, type, nvi->src[s]->mod);
+
switch (nvi->opcode) {
case NV_OP_MUL:
- if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 1.0f) ||
- (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 1)) {
- nvi->opcode = NV_OP_MOV;
+ if ((type == NV_TYPE_F32 && u.f32 == 1.0f) ||
+ (NV_TYPE_ISINT(type) && u.u32 == 1)) {
+ if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+ break;
+ nvi->opcode = op;
nv_reference(pc, &nvi->src[s], NULL);
- if (!s) {
- nvi->src[0] = nvi->src[1];
- nvi->src[1] = NULL;
- }
+ nvi->src[0] = nvi->src[t];
+ nvi->src[1] = NULL;
} else
- if ((type == NV_TYPE_F32 && val->reg.imm.f32 == 2.0f) ||
- (NV_TYPE_ISINT(type) && val->reg.imm.u32 == 2)) {
+ if ((type == NV_TYPE_F32 && u.f32 == 2.0f) ||
+ (NV_TYPE_ISINT(type) && u.u32 == 2)) {
nvi->opcode = NV_OP_ADD;
nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
+ nvi->src[s]->mod = nvi->src[t]->mod;
} else
- if (type == NV_TYPE_F32 && val->reg.imm.f32 == -1.0f) {
- nvi->opcode = NV_OP_NEG;
+ if (type == NV_TYPE_F32 && u.f32 == -1.0f) {
+ if (nvi->src[t]->mod & NV_MOD_NEG)
+ nvi->opcode = NV_OP_MOV;
+ else
+ nvi->opcode = NV_OP_NEG;
nv_reference(pc, &nvi->src[s], NULL);
nvi->src[0] = nvi->src[t];
nvi->src[1] = NULL;
} else
- if (type == NV_TYPE_F32 && val->reg.imm.f32 == -2.0f) {
+ if (type == NV_TYPE_F32 && u.f32 == -2.0f) {
nvi->opcode = NV_OP_ADD;
- assert(!nvi->src[s]->mod);
nv_reference(pc, &nvi->src[s], nvi->src[t]->value);
- nvi->src[t]->mod ^= NV_MOD_NEG;
- nvi->src[s]->mod |= NV_MOD_NEG;
+ nvi->src[s]->mod = (nvi->src[t]->mod ^= NV_MOD_NEG);
} else
- if (val->reg.imm.u32 == 0) {
+ if (u.u32 == 0) {
nvi->opcode = NV_OP_MOV;
nv_reference(pc, &nvi->src[t], NULL);
if (s) {
}
break;
case NV_OP_ADD:
- if (val->reg.imm.u32 == 0) {
- nvi->opcode = NV_OP_MOV;
+ if (u.u32 == 0) {
+ if ((op = modifiers_opcode(nvi->src[t]->mod)) == NV_OP_NOP)
+ break;
+ nvi->opcode = op;
nv_reference(pc, &nvi->src[s], NULL);
nvi->src[0] = nvi->src[t];
nvi->src[1] = NULL;
}
break;
+ case NV_OP_RCP:
+ u.f32 = 1.0f / u.f32;
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, &nvi->src[0], val);
+ break;
+ case NV_OP_RSQ:
+ u.f32 = 1.0f / sqrtf(u.f32);
+ (val = new_value(pc, NV_FILE_IMM, NV_TYPE_F32))->reg.imm.f32 = u.f32;
+ nvi->opcode = NV_OP_MOV;
+ assert(s == 0);
+ nv_reference(pc, &nvi->src[0], val);
+ break;
default:
break;
}
+
+ if (nvi->opcode == NV_OP_MOV && nvi->flags_def) {
+ struct nv_instruction *cvt = new_instruction_at(pc, nvi, NV_OP_CVT);
+
+ nv_reference(pc, &cvt->src[0], nvi->def[0]);
+
+ cvt->flags_def = nvi->flags_def;
+ nvi->flags_def = NULL;
+ }
}
static int
next = nvi->next;
- if ((src = find_immediate(nvi->src[0])) != NULL)
- constant_operand(ctx->pc, nvi, src, 0);
- else
- if ((src = find_immediate(nvi->src[1])) != NULL)
- constant_operand(ctx->pc, nvi, src, 1);
+ src0 = nvcg_find_immediate(nvi->src[0]);
+ src1 = nvcg_find_immediate(nvi->src[1]);
+
+ if (src0 && src1)
+ constant_expression(ctx->pc, nvi, src0, src1);
+ else {
+ if (src0)
+ constant_operand(ctx->pc, nvi, src0, 0);
+ else
+ if (src1)
+ constant_operand(ctx->pc, nvi, src1, 1);
+ }
/* try to combine MUL, ADD into MAD */
if (nvi->opcode != NV_OP_ADD)
else
continue;
+ /* could have an immediate from above constant_* */
+ if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
+ continue;
+
nvi->opcode = NV_OP_MAD;
mod = nvi->src[(src == src0) ? 0 : 1]->mod;
nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
return 0;
}
-/*
-set $r2 g f32 $r2 $r3
-cvt abs rn f32 $r2 s32 $r2
-cvt f32 $c0 # f32 $r2
-e $c0 bra 0x80
-*/
-#if 0
-static int
-nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
-{
- /* XXX: easier in IR builder for now */
- return 0;
-}
-#endif
-
/* TODO: redundant store elimination */
struct load_record {
struct load_record *next;
- uint64_t data;
+ uint64_t data[2];
struct nv_value *value;
};
int alloc;
};
+/* TODO: properly handle loads from l[] memory in the presence of stores */
static int
nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
{
struct load_record **rec, *it;
struct nv_instruction *ld, *next;
- uint64_t data;
+ uint64_t data[2];
struct nv_value *val;
int j;
rec = NULL;
if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
- data = val->reg.id;
+ data[0] = val->reg.id;
+ data[1] = 0;
rec = &ctx->mem_v;
} else
if (ld->opcode == NV_OP_LDA) {
- data = val->reg.id;
+ data[0] = val->reg.id;
+ data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
if (val->reg.file >= NV_FILE_MEM_C(0) &&
val->reg.file <= NV_FILE_MEM_C(15))
rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
rec = &ctx->mem_l;
} else
if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
- data = val->reg.imm.u32;
+ data[0] = val->reg.imm.u32;
+ data[1] = 0;
rec = &ctx->imm;
}
continue;
for (it = *rec; it; it = it->next)
- if (it->data == data)
+ if (it->data[0] == data[0] && it->data[1] == data[1])
break;
if (it) {
-#if 1
- nvcg_replace_value(ctx->pc, ld->def[0], it->value);
-#else
- ld->opcode = NV_OP_MOV;
- nv_reference(ctx->pc, &ld->src[0], it->value);
-#endif
+ if (ld->def[0]->reg.id >= 0)
+ it->value = ld->def[0];
+ else
+ if (!ld->fixed)
+ nvcg_replace_value(ctx->pc, ld->def[0], it->value);
} else {
if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
continue;
it = &ctx->pool[ctx->alloc++];
it->next = *rec;
- it->data = data;
+ it->data[0] = data[0];
+ it->data[1] = data[1];
it->value = ld->def[0];
*rec = it;
}
int j;
struct nv_instruction *nvi, *next;
- for (nvi = b->entry; nvi; nvi = next) {
+ for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
next = nvi->next;
if (inst_cullable(nvi)) {
return 0;
}
+/* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
+ * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
+ * BREAK and dummy ELSE block.
+ */
static INLINE boolean
-bb_simple_if_endif(struct nv_basic_block *bb)
+bb_is_if_else_endif(struct nv_basic_block *bb)
+{
+ if (!bb->out[0] || !bb->out[1])
+ return FALSE;
+
+ if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
+ return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
+ !bb->out[1]->out[1]);
+ } else {
+ return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
+ !bb->out[0]->out[1] &&
+ !bb->out[1]->out[1]);
+ }
+}
+
+/* predicate instructions and remove branch at the end */
+static void
+predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
+ struct nv_value *p, ubyte cc)
{
- return (bb->out[0] && bb->out[1] &&
- bb->out[0]->out[0] == bb->out[1] &&
- !bb->out[0]->out[1]);
+ struct nv_instruction *nvi;
+
+ if (!b->entry)
+ return;
+ for (nvi = b->entry; nvi->next; nvi = nvi->next) {
+ if (!nvi_isnop(nvi)) {
+ nvi->cc = cc;
+ nv_reference(pc, &nvi->flags_src, p);
+ }
+ }
+
+ if (nvi->opcode == NV_OP_BRA)
+ nv_nvi_delete(nvi);
+ else
+ if (!nvi_isnop(nvi)) {
+ nvi->cc = cc;
+ nv_reference(pc, &nvi->flags_src, p);
+ }
}
+/* NOTE: Run this after register allocation, we can just cut out the cflow
+ * instructions and hook the predicates to the conditional OPs if they are
+ * not using immediates; better than inserting SELECT to join definitions.
+ *
+ * NOTE: Should adapt prior optimization to make this possible more often.
+ */
static int
nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
{
- int j;
+ struct nv_instruction *nvi;
+ struct nv_value *pred;
+ int i;
+ int n0 = 0, n1 = 0;
+
+ if (bb_is_if_else_endif(b)) {
+
+ NV50_DBGMSG(PROG_IR,
+ "pass_flatten: IF/ELSE/ENDIF construct at BB:%i\n", b->id);
- if (bb_simple_if_endif(b)) {
- ++ctx->n;
- debug_printf("nv_pass_flatten: total IF/ENDIF constructs: %i\n", ctx->n);
+ for (n0 = 0, nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
+ if (!nv50_nvi_can_predicate(nvi))
+ break;
+ if (!nvi) {
+ for (n1 = 0, nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
+ if (!nv50_nvi_can_predicate(nvi))
+ break;
+#if NV50_DEBUG & NV50_DEBUG_PROG_IR
+ if (nvi) {
+ debug_printf("cannot predicate: "); nv_print_instruction(nvi);
+ }
+ } else {
+ debug_printf("cannot predicate: "); nv_print_instruction(nvi);
+#endif
+ }
+
+ if (!nvi && n0 < 12 && n1 < 12) { /* 12 as arbitrary limit */
+ assert(b->exit && b->exit->flags_src);
+ pred = b->exit->flags_src->value;
+
+ predicate_instructions(ctx->pc, b->out[0], pred, NV_CC_NE | NV_CC_U);
+ predicate_instructions(ctx->pc, b->out[1], pred, NV_CC_EQ);
+
+ assert(b->exit && b->exit->opcode == NV_OP_BRA);
+ nv_nvi_delete(b->exit);
+
+ if (b->exit && b->exit->opcode == NV_OP_JOINAT)
+ nv_nvi_delete(b->exit);
+
+ i = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
+
+ if ((nvi = b->out[0]->out[i]->entry)) {
+ nvi->is_join = 0;
+ if (nvi->opcode == NV_OP_JOIN)
+ nv_nvi_delete(nvi);
+ }
+ }
}
- DESCEND_ARBITRARY(j, nv_pass_flatten);
+ DESCEND_ARBITRARY(i, nv_pass_flatten);
return 0;
}
for (ir = entry; ir; ir = next) {
next = ir->next;
for (ik = entry; ik != ir; ik = ik->next) {
- if (ir->opcode != ik->opcode)
+ if (ir->opcode != ik->opcode || ir->fixed)
continue;
- if (ik->opcode == NV_OP_LDA ||
+ if (!ir->def[0] || !ik->def[0] ||
+ ik->opcode == NV_OP_LDA ||
ik->opcode == NV_OP_STA ||
ik->opcode == NV_OP_MOV ||
nv_is_vector_op(ik->opcode))
if (ik->src[4] || ir->src[4])
continue; /* don't mess with address registers */
+ if (ik->flags_src || ir->flags_src ||
+ ik->flags_def || ir->flags_def)
+ continue; /* and also not with flags, for now */
+
+ if (ik->def[0]->reg.file == NV_FILE_OUT ||
+ ir->def[0]->reg.file == NV_FILE_OUT ||
+ !values_equal(ik->def[0], ir->def[0]))
+ continue;
+
for (s = 0; s < 3; ++s) {
struct nv_value *a, *b;
return 0;
}
-int
-nv_pc_exec_pass0(struct nv_pc *pc)
+static int
+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass_reld_elim *reldelim;
struct nv_pass pass;
pass.n = 0;
pass.pc = pc;
- pc->pass_seq++;
- ret = nv_pass_flatten(&pass, pc->root);
- if (ret)
- return ret;
-
/* Do this first, so we don't have to pay attention
* to whether sources are supported memory loads.
*/
pc->pass_seq++;
- ret = nv_pass_lower_arith(&pass, pc->root);
+ ret = nv_pass_lower_arith(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_fold_loads(&pass, pc->root);
+ ret = nv_pass_lower_mods(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_fold_stores(&pass, pc->root);
+ ret = nv_pass_fold_loads(&pass, root);
if (ret)
return ret;
- reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
- reldelim->pc = pc;
pc->pass_seq++;
- ret = nv_pass_reload_elim(reldelim, pc->root);
- FREE(reldelim);
+ ret = nv_pass_fold_stores(&pass, root);
if (ret)
return ret;
- pc->pass_seq++;
- ret = nv_pass_cse(&pass, pc->root);
- if (ret)
- return ret;
+ if (pc->opt_reload_elim) {
+ reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
+ reldelim->pc = pc;
+ pc->pass_seq++;
+ ret = nv_pass_reload_elim(reldelim, root);
+ FREE(reldelim);
+ if (ret)
+ return ret;
+ }
pc->pass_seq++;
- ret = nv_pass_lower_mods(&pass, pc->root);
+ ret = nv_pass_cse(&pass, root);
if (ret)
return ret;
do {
dce.removed = 0;
pc->pass_seq++;
- ret = nv_pass_dce(&dce, pc->root);
+ ret = nv_pass_dce(&dce, root);
if (ret)
return ret;
} while (dce.removed);
- ret = nv_pass_tex_mask(&pass, pc->root);
+ ret = nv_pass_tex_mask(&pass, root);
if (ret)
return ret;
return ret;
}
+
+int
+nv_pc_exec_pass0(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
+ return ret;
+ return 0;
+}