#define FBC_DUMP(q)
#endif
-#include <iostream>
-
#include "sb_bc.h"
-
#include "sb_shader.h"
-
#include "sb_pass.h"
namespace r600_sb {
-using std::cerr;
+void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
+
+ alu_group_node *g = sh.create_alu_group();
+ alu_node *a = sh.create_alu();
+
+ a->bc.set_op(ALU_OP0_NOP);
+ a->bc.last = 1;
+
+ g->push_back(a);
+ b4->insert_before(g);
+}
int bc_finalizer::run() {
- regions_vec &rv = sh.get_regions();
+ run_on(sh.root);
+ regions_vec &rv = sh.get_regions();
for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
++I) {
region_node *r = *I;
r->expand();
}
- run_on(sh.root);
-
cf_peephole();
// workaround for some problems on r6xx/7xx
// add ALU NOP to each vertex shader
- if (!ctx.is_egcm() && sh.target == TARGET_VS) {
+ if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
alu_group_node *g = sh.create_alu_group();
last_cf = c;
}
- if (last_cf->bc.op_ptr->flags & CF_ALU) {
+ if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
last_cf = sh.create_cf(CF_OP_NOP);
sh.root->push_back(last_cf);
}
- if (ctx.is_cayman())
- last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
- else
+ if (ctx.is_cayman()) {
+ if (!last_cf) {
+ cf_node *c = sh.create_cf(CF_OP_CF_END);
+ sh.root->push_back(c);
+ } else
+ last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
+ } else
last_cf->bc.end_of_program = 1;
for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
void bc_finalizer::finalize_loop(region_node* r) {
+ update_nstack(r);
+
cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
+ // Update last_cf, but don't overwrite it if it's outside the current loop nest since
+ // it may point to a cf that is later in program order.
+ // The single parent level check is sufficient since finalize_loop() is processed in
+ // reverse order from innermost to outermost loop nest level.
+ if (!last_cf || last_cf->get_parent_region() == r) {
+ last_cf = loop_end;
+ }
+
loop_start->jump_after(loop_end);
loop_end->jump_after(loop_start);
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
cf_node *if_pop = sh.create_cf(CF_OP_POP);
+ if (!last_cf || last_cf->get_parent_region() == r) {
+ last_cf = if_pop;
+ }
if_pop->bc.pop_count = 1;
if_pop->jump_after(if_pop);
}
void bc_finalizer::run_on(container_node* c) {
-
+ node *prev_node = NULL;
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
node *n = *I;
if (n->is_alu_group()) {
- finalize_alu_group(static_cast<alu_group_node*>(n));
+ finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
} else {
- if (n->is_fetch_inst()) {
+ if (n->is_alu_clause()) {
+ cf_node *c = static_cast<cf_node*>(n);
+
+ if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
+ if (ctx.stack_workaround_8xx) {
+ region_node *r = c->get_parent_region();
+ if (r) {
+ unsigned ifs, loops;
+ unsigned elems = get_stack_depth(r, loops, ifs);
+ unsigned dmod1 = elems % ctx.stack_entry_size;
+ unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
+
+ if (elems && (!dmod1 || !dmod2))
+ c->flags |= NF_ALU_STACK_WORKAROUND;
+ }
+ } else if (ctx.stack_workaround_9xx) {
+ region_node *r = c->get_parent_region();
+ if (r) {
+ unsigned ifs, loops;
+ get_stack_depth(r, loops, ifs);
+ if (loops >= 2)
+ c->flags |= NF_ALU_STACK_WORKAROUND;
+ }
+ }
+ }
+ } else if (n->is_fetch_inst()) {
finalize_fetch(static_cast<fetch_node*>(n));
} else if (n->is_cf_inst()) {
finalize_cf(static_cast<cf_node*>(n));
- } else if (n->is_alu_clause()) {
-
- } else if (n->is_fetch_clause()) {
-
- } else {
- assert(!"unexpected node");
}
-
if (n->is_container())
run_on(static_cast<container_node*>(n));
}
+ prev_node = n;
}
}
-void bc_finalizer::finalize_alu_group(alu_group_node* g) {
+void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
alu_node *last = NULL;
+ alu_group_node *prev_g = NULL;
+ bool add_nop = false;
+ if (prev_node && prev_node->is_alu_group()) {
+ prev_g = static_cast<alu_group_node*>(prev_node);
+ }
for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
alu_node *n = static_cast<alu_node*>(*I);
unsigned slot = n->bc.slot;
-
value *d = n->dst.empty() ? NULL : n->dst[0];
if (d && d->is_special_reg()) {
- assert(n->bc.op_ptr->flags & AF_MOVA);
+ assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
d = NULL;
}
update_ngpr(n->bc.dst_gpr);
- finalize_alu_src(g, n);
+ add_nop |= finalize_alu_src(g, n, prev_g);
last = n;
}
+ if (add_nop) {
+ if (sh.get_ctx().r6xx_gpr_index_workaround) {
+ insert_rv6xx_load_ar_workaround(g);
+ }
+ }
last->bc.last = 1;
}
-void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
+bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
vvec &sv = a->src;
-
+ bool add_nop = false;
FBC_DUMP(
- cerr << "finalize_alu_src: ";
+ sblog << "finalize_alu_src: ";
dump::dump_op(a);
- cerr << "\n";
+ sblog << "\n";
);
unsigned si = 0;
if (!v->rel->is_const()) {
src.rel = 1;
update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
+ if (prev && !add_nop) {
+ for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+ alu_node *pn = static_cast<alu_node*>(*pI);
+ if (pn->bc.dst_gpr == src.sel) {
+ add_nop = true;
+ break;
+ }
+ }
+ }
} else
src.rel = 0;
assert(!"unknown value kind");
break;
}
+ if (prev && !add_nop) {
+ for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+ alu_node *pn = static_cast<alu_node*>(*pI);
+ if (pn->bc.dst_rel) {
+ if (pn->bc.dst_gpr == src.sel) {
+ add_nop = true;
+ break;
+ }
+ }
+ }
+ }
}
while (si < 3) {
a->bc.src[si++].sel = 0;
}
+ return add_nop;
}
-void bc_finalizer::emit_set_grad(fetch_node* f) {
+void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
+{
+ int reg = -1;
- assert(f->src.size() == 12);
- unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
+ for (unsigned chan = 0; chan < 4; ++chan) {
- unsigned arg_start = 0;
+ dst.bc.dst_sel[chan] = SEL_MASK;
- for (unsigned op = 0; op < 2; ++op) {
- fetch_node *n = sh.create_fetch();
- n->bc.set_op(ops[op]);
+ unsigned sel = SEL_MASK;
- // FIXME extract this loop into a separate method and reuse it
+ value *v = src.src[arg_start + chan];
- int reg = -1;
+ if (!v || v->is_undef()) {
+ sel = SEL_MASK;
+ } else if (v->is_const()) {
+ literal l = v->literal_value;
+ if (l == literal(0))
+ sel = SEL_0;
+ else if (l == literal(1.0f))
+ sel = SEL_1;
+ else {
+ sblog << "invalid fetch constant operand " << chan << " ";
+ dump::dump_op(&src);
+ sblog << "\n";
+ abort();
+ }
- arg_start += 4;
+ } else if (v->is_any_gpr()) {
+ unsigned vreg = v->gpr.sel();
+ unsigned vchan = v->gpr.chan();
- for (unsigned chan = 0; chan < 4; ++chan) {
+ if (reg == -1)
+ reg = vreg;
+ else if ((unsigned)reg != vreg) {
+ sblog << "invalid fetch source operand " << chan << " ";
+ dump::dump_op(&src);
+ sblog << "\n";
+ abort();
+ }
- n->bc.dst_sel[chan] = SEL_MASK;
+ sel = vchan;
- unsigned sel = SEL_MASK;
+ } else {
+ sblog << "invalid fetch source operand " << chan << " ";
+ dump::dump_op(&src);
+ sblog << "\n";
+ abort();
+ }
- value *v = f->src[arg_start + chan];
+ dst.bc.src_sel[chan] = sel;
+ }
- if (!v || v->is_undef()) {
- sel = SEL_MASK;
- } else if (v->is_const()) {
- literal l = v->literal_value;
- if (l == literal(0))
- sel = SEL_0;
- else if (l == literal(1.0f))
- sel = SEL_1;
- else {
- cerr << "invalid fetch constant operand " << chan << " ";
- dump::dump_op(f);
- cerr << "\n";
- abort();
- }
+ if (reg >= 0)
+ update_ngpr(reg);
- } else if (v->is_any_gpr()) {
- unsigned vreg = v->gpr.sel();
- unsigned vchan = v->gpr.chan();
+ dst.bc.src_gpr = reg >= 0 ? reg : 0;
+}
- if (reg == -1)
- reg = vreg;
- else if ((unsigned)reg != vreg) {
- cerr << "invalid fetch source operand " << chan << " ";
- dump::dump_op(f);
- cerr << "\n";
- abort();
- }
+void bc_finalizer::emit_set_grad(fetch_node* f) {
- sel = vchan;
+ assert(f->src.size() == 12);
+ unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
- } else {
- cerr << "invalid fetch source operand " << chan << " ";
- dump::dump_op(f);
- cerr << "\n";
- abort();
- }
+ unsigned arg_start = 0;
- n->bc.src_sel[chan] = sel;
- }
+ for (unsigned op = 0; op < 2; ++op) {
+ fetch_node *n = sh.create_fetch();
+ n->bc.set_op(ops[op]);
- if (reg >= 0)
- update_ngpr(reg);
+ arg_start += 4;
- n->bc.src_gpr = reg >= 0 ? reg : 0;
+ copy_fetch_src(*n, *f, arg_start);
f->insert_before(n);
}
}
+void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
+ assert(f.src.size() == 8);
+
+ fetch_node *n = sh.create_fetch();
+
+ n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
+
+ copy_fetch_src(*n, f, 4);
+
+ f.insert_before(n);
+}
+
void bc_finalizer::finalize_fetch(fetch_node* f) {
int reg = -1;
src_count = 1;
} else if (flags & FF_USEGRAD) {
emit_set_grad(f);
+ } else if (flags & FF_USE_TEXTURE_OFFSETS) {
+ emit_set_texture_offsets(*f);
}
for (unsigned chan = 0; chan < src_count; ++chan) {
else if (l == literal(1.0f))
sel = SEL_1;
else {
- cerr << "invalid fetch constant operand " << chan << " ";
+ sblog << "invalid fetch constant operand " << chan << " ";
dump::dump_op(f);
- cerr << "\n";
+ sblog << "\n";
abort();
}
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
- cerr << "invalid fetch source operand " << chan << " ";
+ sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
- cerr << "\n";
+ sblog << "\n";
abort();
}
sel = vchan;
} else {
- cerr << "invalid fetch source operand " << chan << " ";
+ sblog << "invalid fetch source operand " << chan << " ";
dump::dump_op(f);
- cerr << "\n";
+ sblog << "\n";
abort();
}
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
- cerr << "invalid fetch dst operand " << chan << " ";
+ sblog << "invalid fetch dst operand " << chan << " ";
dump::dump_op(f);
- cerr << "\n";
+ sblog << "\n";
abort();
}
dst_swz[vchan] = sel;
} else {
- cerr << "invalid fetch dst operand " << chan << " ";
+ sblog << "invalid fetch dst operand " << chan << " ";
dump::dump_op(f);
- cerr << "\n";
+ sblog << "\n";
abort();
}
unsigned flags = c->bc.op_ptr->flags;
- if (flags & CF_CALL) {
- update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2);
- }
-
c->bc.end_of_program = 0;
last_cf = c;
else if (l == literal(1.0f))
sel = SEL_1;
else {
- cerr << "invalid export constant operand " << chan << " ";
+ sblog << "invalid export constant operand " << chan << " ";
dump::dump_op(c);
- cerr << "\n";
+ sblog << "\n";
abort();
}
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
- cerr << "invalid export source operand " << chan << " ";
+ sblog << "invalid export source operand " << chan << " ";
dump::dump_op(c);
- cerr << "\n";
+ sblog << "\n";
abort();
}
sel = vchan;
} else {
- cerr << "invalid export source operand " << chan << " ";
+ sblog << "invalid export source operand " << chan << " ";
dump::dump_op(c);
- cerr << "\n";
+ sblog << "\n";
abort();
}
continue;
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
- cerr << "invalid source operand " << chan << " ";
+ sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
- cerr << "\n";
+ sblog << "\n";
abort();
}
unsigned vreg = v->gpr.sel();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
- cerr << "invalid source operand " << chan << " ";
+ sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
- cerr << "\n";
+ sblog << "\n";
abort();
}
mask |= (1 << chan);
}
- assert(reg >= 0 && mask);
-
if (reg >= 0)
update_ngpr(reg);
c->bc.rw_gpr = reg >= 0 ? reg : 0;
c->bc.comp_mask = mask;
- if ((flags & CF_RAT) && (c->bc.type & 1)) {
+ if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
reg = -1;
continue;
if (!v->is_any_gpr() || v->gpr.chan() != chan) {
- cerr << "invalid source operand " << chan << " ";
+ sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
- cerr << "\n";
+ sblog << "\n";
abort();
}
unsigned vreg = v->gpr.sel();
if (reg == -1)
reg = vreg;
else if ((unsigned)reg != vreg) {
- cerr << "invalid source operand " << chan << " ";
+ sblog << "invalid source operand " << chan << " ";
dump::dump_op(c);
- cerr << "\n";
+ sblog << "\n";
abort();
}
}
c->bc.index_gpr = reg >= 0 ? reg : 0;
}
-
-
-
- } else {
-
-#if 0
- if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) {
- c->bc.valid_pixel_mode = 1;
- }
-#endif
-
+ } else if (flags & CF_CALL) {
+ update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
}
}
ngpr = gpr + 1;
}
-void bc_finalizer::update_nstack(region_node* r, unsigned add) {
- unsigned loops = 0;
- unsigned ifs = 0;
+unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
+ unsigned &ifs, unsigned add) {
+ unsigned stack_elements = add;
+ bool has_non_wqm_push = (add != 0);
+ region_node *r = n->is_region() ?
+ static_cast<region_node*>(n) : n->get_parent_region();
+
+ loops = 0;
+ ifs = 0;
while (r) {
- if (r->is_loop())
+ if (r->is_loop()) {
++loops;
- else
+ } else {
++ifs;
-
+ has_non_wqm_push = true;
+ }
r = r->get_parent_region();
}
-
- unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add;
-
- // FIXME calculate more precisely
- if (ctx.is_evergreen()) {
- ++stack_elements;
- } else {
- stack_elements += 2;
- if (ctx.is_cayman())
+ stack_elements += (loops * ctx.stack_entry_size) + ifs;
+
+ // reserve additional elements in some cases
+ switch (ctx.hw_class) {
+ case HW_CLASS_R600:
+ case HW_CLASS_R700:
+ // If any non-WQM push is invoked, 2 elements should be reserved.
+ if (has_non_wqm_push)
+ stack_elements += 2;
+ break;
+ case HW_CLASS_CAYMAN:
+ // If any stack operation is invoked, 2 elements should be reserved
+ if (stack_elements)
+ stack_elements += 2;
+ break;
+ case HW_CLASS_EVERGREEN:
+ // According to the docs we need to reserve 1 element for each of the
+ // following cases:
+ // 1) non-WQM push is used with WQM/LOOP frames on stack
+ // 2) ALU_ELSE_AFTER is used at the point of max stack usage
+ // NOTE:
+ // It was found that the conditions above are not sufficient, there are
+ // other cases where we also need to reserve stack space, that's why
+ // we always reserve 1 stack element if we have non-WQM push on stack.
+ // Condition 2 is ignored for now because we don't use this instruction.
+ if (has_non_wqm_push)
++stack_elements;
+ break;
+ case HW_CLASS_UNKNOWN:
+ assert(0);
}
+ return stack_elements;
+}
+
+void bc_finalizer::update_nstack(region_node* r, unsigned add) {
+ unsigned loops = 0;
+ unsigned ifs = 0;
+ unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
- unsigned stack_entries = (stack_elements + 3) >> 2;
+ // XXX all chips expect this value to be computed using 4 as entry size,
+ // not the real entry size
+ unsigned stack_entries = (elems + 3) >> 2;
if (nstack < stack_entries)
nstack = stack_entries;
}
void bc_finalizer::cf_peephole() {
+ if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
+ for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
+ I = N) {
+ N = I; ++N;
+ cf_node *c = static_cast<cf_node*>(*I);
+
+ if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
+ (c->flags & NF_ALU_STACK_WORKAROUND)) {
+ cf_node *push = sh.create_cf(CF_OP_PUSH);
+ c->insert_before(push);
+ push->jump(c);
+ c->bc.set_op(CF_OP_ALU);
+ }
+ }
+ }
for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
I = N) {