namespace r600_sb {
+void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
+
+ alu_group_node *g = sh.create_alu_group();
+ alu_node *a = sh.create_alu();
+
+ a->bc.set_op(ALU_OP0_NOP);
+ a->bc.last = 1;
+
+ g->push_back(a);
+ b4->insert_before(g);
+}
+
int bc_finalizer::run() {
run_on(sh.root);
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
+ // Update last_cf, but don't overwrite it if it's outside the current loop nest since
+ // it may point to a cf that is later in program order.
+ // The single parent level check is sufficient since finalize_loop() is processed in
+ // reverse order from innermost to outermost loop nest level.
+ if (!last_cf || last_cf->get_parent_region() == r) {
+ last_cf = loop_end;
+ }
+
loop_start->jump_after(loop_end);
loop_end->jump_after(loop_start);
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
cf_node *if_pop = sh.create_cf(CF_OP_POP);
+ if (!last_cf || last_cf->get_parent_region() == r) {
+ last_cf = if_pop;
+ }
if_pop->bc.pop_count = 1;
if_pop->jump_after(if_pop);
}
void bc_finalizer::run_on(container_node* c) {
-
+ node *prev_node = NULL;
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
node *n = *I;
if (n->is_alu_group()) {
- finalize_alu_group(static_cast<alu_group_node*>(n));
+ finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
} else {
if (n->is_alu_clause()) {
cf_node *c = static_cast<cf_node*>(n);
}
}
}
+ last_cf = c;
} else if (n->is_fetch_inst()) {
finalize_fetch(static_cast<fetch_node*>(n));
} else if (n->is_cf_inst()) {
if (n->is_container())
run_on(static_cast<container_node*>(n));
}
+ prev_node = n;
}
}
-void bc_finalizer::finalize_alu_group(alu_group_node* g) {
+void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
alu_node *last = NULL;
+ alu_group_node *prev_g = NULL;
+ bool add_nop = false;
+ if (prev_node && prev_node->is_alu_group()) {
+ prev_g = static_cast<alu_group_node*>(prev_node);
+ }
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
alu_node *n = static_cast<alu_node*>(*I);
unsigned slot = n->bc.slot;
-
value *d = n->dst.empty() ? NULL : n->dst[0];
if (d && d->is_special_reg()) {
- assert(n->bc.op_ptr->flags & AF_MOVA);
+ assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
d = NULL;
}
assert(fdst.chan() == slot || slot == SLOT_TRANS);
}
- n->bc.dst_gpr = fdst.sel();
+ if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
+ n->bc.dst_gpr = fdst.sel();
n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
update_ngpr(n->bc.dst_gpr);
- finalize_alu_src(g, n);
+ add_nop |= finalize_alu_src(g, n, prev_g);
last = n;
}
+ if (add_nop) {
+ if (sh.get_ctx().r6xx_gpr_index_workaround) {
+ insert_rv6xx_load_ar_workaround(g);
+ }
+ }
last->bc.last = 1;
}
-void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
+bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
vvec &sv = a->src;
-
+ bool add_nop = false;
FBC_DUMP(
sblog << "finalize_alu_src: ";
dump::dump_op(a);
if (!v->rel->is_const()) {
src.rel = 1;
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
+ if (prev && !add_nop) {
+ for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+ alu_node *pn = static_cast<alu_node*>(*pI);
+ if (pn->bc.dst_gpr == src.sel) {
+ add_nop = true;
+ break;
+ }
+ }
+ }
} else
src.rel = 0;
assert(!"unknown value kind");
break;
}
+ if (prev && !add_nop) {
+ for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+ alu_node *pn = static_cast<alu_node*>(*pI);
+ if (pn->bc.dst_rel) {
+ if (pn->bc.dst_gpr == src.sel) {
+ add_nop = true;
+ break;
+ }
+ }
+ }
+ }
}
while (si < 3) {
a->bc.src[si++].sel = 0;
}
+ return add_nop;
}
void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
void bc_finalizer::emit_set_grad(fetch_node* f) {
- assert(f->src.size() == 12);
+ assert(f->src.size() == 12 || f->src.size() == 13);
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
unsigned arg_start = 0;
if (flags & FF_VTX) {
src_count = 1;
+ } else if (flags & FF_GDS) {
+ src_count = 2;
} else if (flags & FF_USEGRAD) {
emit_set_grad(f);
} else if (flags & FF_USE_TEXTURE_OFFSETS) {
for (unsigned i = 0; i < 4; ++i)
f->bc.dst_sel[i] = dst_swz[i];
+ if ((flags & FF_GDS) && reg == -1) {
+ f->bc.dst_sel[0] = SEL_MASK;
+ f->bc.dst_gpr = 0;
+ return ;
+ }
assert(reg >= 0);
if (reg >= 0)
mask |= (1 << chan);
}
- assert(reg >= 0 && mask);
-
if (reg >= 0)
update_ngpr(reg);
}
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
- unsigned sel = v->select.sel();
- unsigned bank = sel >> 12;
+ unsigned sel = v->select.kcache_sel();
+ unsigned bank = v->select.kcache_bank();
unsigned chan = v->select.chan();
static const unsigned kc_base[] = {128, 160, 256, 288};
cf_node *c = static_cast<cf_node*>(*I);
if (c->jump_after_target) {
+ if (c->jump_target->next == NULL) {
+ c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
+ if (last_cf == c->jump_target)
+ last_cf = static_cast<cf_node*>(c->jump_target->next);
+ }
c->jump_target = static_cast<cf_node*>(c->jump_target->next);
c->jump_after_target = false;
}