#define BCP_DUMP(q)
#endif
-extern "C" {
#include "r600_pipe.h"
#include "r600_shader.h"
-}
#include <stack>
namespace r600_sb {
-using std::cerr;
-
-int bc_parser::parse() {
+int bc_parser::decode() {
dw = bc->bytecode;
bc_ndw = bc->ndw;
if (pshader) {
switch (bc->type) {
case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
- case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break;
+ case TGSI_PROCESSOR_VERTEX:
+ t = pshader->vs_as_es ? TARGET_ES : TARGET_VS;
+ break;
+ case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break;
case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
default: assert(!"unknown shader target"); return -1; break;
}
t = TARGET_FETCH;
}
- sh = new shader(ctx, t, bc->debug_id, enable_dump);
- int r = parse_shader();
+ sh = new shader(ctx, t, bc->debug_id);
+ sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE);
- delete dec;
+ int r = decode_shader();
- if (r)
- return r;
+ delete dec;
sh->ngpr = bc->ngpr;
sh->nstack = bc->nstack;
- if (sh->target != TARGET_FETCH) {
- sh->src_stats.ndw = bc->ndw;
- sh->collect_stats(false);
- }
-
- if (enable_dump) {
- bc_dump(*sh, cerr, bc->bytecode, bc_ndw).run();
- }
-
- if (!optimize)
- return 0;
-
- prepare_ir();
-
return r;
}
-int bc_parser::parse_shader() {
+int bc_parser::decode_shader() {
int r = 0;
unsigned i = 0;
bool eop = false;
sh->init();
- if (pshader)
- parse_decls();
-
do {
eop = false;
- if ((r = parse_cf(i, eop)))
+ if ((r = decode_cf(i, eop)))
return r;
- } while (!eop || (i >> 1) <= max_cf);
+ } while (!eop || (i >> 1) < max_cf);
return 0;
}
-int bc_parser::parse_decls() {
-
-// sh->prepare_regs(rs.bc.ngpr);
+int bc_parser::prepare() {
+ int r = 0;
+ if ((r = parse_decls()))
+ return r;
+ if ((r = prepare_ir()))
+ return r;
+ return 0;
+}
- if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
+int bc_parser::parse_decls() {
-#if SB_NO_ARRAY_INFO
+ if (!pshader) {
+ if (gpr_reladdr)
+ sh->add_gpr_array(0, bc->ngpr, 0x0F);
- sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
+ // compute shaders have some values preloaded in R0, R1
+ sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */);
+ sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */);
+ return 0;
+ }
-#else
+ if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
assert(pshader->num_arrays);
if (pshader->num_arrays) {
-
for (unsigned i = 0; i < pshader->num_arrays; ++i) {
r600_shader_array &a = pshader->arrays[i];
sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
}
-
} else {
sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
}
+ }
-
-#endif
-
+ // GS inputs can add indirect addressing
+ if (sh->target == TARGET_GS) {
+ if (pshader->num_arrays) {
+ for (unsigned i = 0; i < pshader->num_arrays; ++i) {
+ r600_shader_array &a = pshader->arrays[i];
+ sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
+ }
+ }
}
- if (sh->target == TARGET_VS)
+ if (sh->target == TARGET_VS || sh->target == TARGET_ES)
+ sh->add_input(0, 1, 0x0F);
+ else if (sh->target == TARGET_GS) {
sh->add_input(0, 1, 0x0F);
+ sh->add_input(1, 1, 0x0F);
+ }
bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
&& sh->target == TARGET_PS;
- unsigned linear = 0, persp = 0, centroid = 1;
+ bool ij_interpolators[6];
+ memset(ij_interpolators, 0, sizeof(ij_interpolators));
for (unsigned i = 0; i < pshader->ninput; ++i) {
r600_shader_io & in = pshader->input[i];
bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid);
sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F);
if (ps_interp && in.spi_sid) {
- if (in.interpolate == TGSI_INTERPOLATE_LINEAR ||
- in.interpolate == TGSI_INTERPOLATE_COLOR)
- linear = 1;
- else if (in.interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
- persp = 1;
- if (in.centroid)
- centroid = 2;
+ int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location);
+ if (k >= 0)
+ ij_interpolators[k] |= true;
}
}
if (ps_interp) {
- unsigned mask = (1 << (2 * (linear + persp) * centroid)) - 1;
+ /* add the egcm ij interpolators to live inputs */
+ unsigned num_ij = 0;
+ for (unsigned i = 0; i < Elements(ij_interpolators); i++) {
+ num_ij += ij_interpolators[i];
+ }
+
+ unsigned mask = (1 << (2 * num_ij)) - 1;
unsigned gpr = 0;
while (mask) {
}
}
-
return 0;
}
-
-int bc_parser::parse_cf(unsigned &i, bool &eop) {
+int bc_parser::decode_cf(unsigned &i, bool &eop) {
int r;
cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
if (flags & CF_ALU) {
- if ((r = parse_alu_clause(cf)))
+ if ((r = decode_alu_clause(cf)))
return r;
} else if (flags & CF_FETCH) {
- if ((r = parse_fetch_clause(cf)))
+ if ((r = decode_fetch_clause(cf)))
return r;;
} else if (flags & CF_EXP) {
+ if (cf->bc.rw_rel)
+ gpr_reladdr = true;
assert(!cf->bc.rw_rel);
- } else if (flags & (CF_STRM | CF_RAT)) {
+ } else if (flags & CF_MEM) {
+ if (cf->bc.rw_rel)
+ gpr_reladdr = true;
assert(!cf->bc.rw_rel);
- } else if (cf->bc.op == CF_OP_CALL_FS) {
- sh->init_call_fs(cf);
- cf->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
} else if (flags & CF_BRANCH) {
if (cf->bc.addr > max_cf)
max_cf = cf->bc.addr;
return 0;
}
-int bc_parser::parse_alu_clause(cf_node* cf) {
+int bc_parser::decode_alu_clause(cf_node* cf) {
unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
cf->subtype = NST_ALU_CLAUSE;
unsigned ng = 0;
do {
- parse_alu_group(cf, i, gcnt);
+ decode_alu_group(cf, i, gcnt);
assert(gcnt <= cnt);
cnt -= gcnt;
ng++;
return 0;
}
-int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
+int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
int r;
alu_node *n;
alu_group_node *g = sh->create_alu_group();
cgroup = !cgroup;
memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
-
gcnt = 0;
+ unsigned literal_mask = 0;
+
do {
n = sh->create_alu();
g->push_back(n);
assert(n->bc.last);
- unsigned literal_mask = 0;
+ for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
+ n = static_cast<alu_node*>(*I);
+
+ if (n->bc.dst_rel)
+ gpr_reladdr = true;
+
+ for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
+ bc_alu_src &src = n->bc.src[k];
+ if (src.rel)
+ gpr_reladdr = true;
+ if (src.sel == ALU_SRC_LITERAL) {
+ literal_mask |= (1 << src.chan);
+ src.value.u = dw[i + src.chan];
+ }
+ }
+ }
+
+ unsigned literal_ndw = 0;
+ while (literal_mask) {
+ g->literals.push_back(dw[i + literal_ndw]);
+ literal_ndw += 1;
+ literal_mask >>= 1;
+ }
+
+ literal_ndw = (literal_ndw + 1) & ~1u;
+
+ i += literal_ndw;
+ gcnt += literal_ndw >> 1;
+
+ cf->push_back(g);
+ return 0;
+}
+
+int bc_parser::prepare_alu_clause(cf_node* cf) {
+
+ // loop over alu groups
+ for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+ assert(I->subtype == NST_ALU_GROUP);
+ alu_group_node *g = static_cast<alu_group_node*>(*I);
+ prepare_alu_group(cf, g);
+ }
+
+ return 0;
+}
+
+int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
+
+ alu_node *n;
+
+ cgroup = !cgroup;
+ memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
for (node_iterator I = g->begin(), E = g->end();
I != E; ++I) {
n = static_cast<alu_node*>(*I);
+
+ if (!sh->assign_slot(n, slots[cgroup])) {
+ assert(!"alu slot assignment failed");
+ return -1;
+ }
+
unsigned src_count = n->bc.op_ptr->src_count;
if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
bc_alu_src &src = n->bc.src[s];
if (src.sel == ALU_SRC_LITERAL) {
- unsigned chan = src.chan;
-
- literal_mask |= (1 << chan);
- src.value.u = dw[i+chan];
n->src[s] = sh->get_const_value(src.value);
} else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
SLOT_TRANS : src.chan;
+
+ // XXX shouldn't happen but llvm backend uses PS on cayman
+ if (prev_slot == SLOT_TRANS && ctx.is_cayman())
+ prev_slot = SLOT_X;
+
alu_node *prev_alu = slots[pgroup][prev_slot];
assert(prev_alu);
if (p) {
g->push_front(p);
- }
- unsigned literal_ndw = 0;
- while (literal_mask) {
- g->literals.push_back(dw[i + literal_ndw]);
- literal_ndw += 1;
- literal_mask >>= 1;
- }
+ if (p->count() == 3 && ctx.is_cayman()) {
+ // cayman's scalar instruction that can use 3 or 4 slots
- literal_ndw = (literal_ndw + 1) & ~1u;
+ // FIXME for simplicity we'll always add 4th slot,
+ // but probably we might want to always remove 4th slot and make
+ // sure that regalloc won't choose 'w' component for dst
- i += literal_ndw;
- gcnt += literal_ndw >> 1;
+ alu_node *f = static_cast<alu_node*>(p->first);
+ alu_node *a = sh->create_alu();
+ a->src = f->src;
+ a->dst.resize(f->dst.size());
+ a->bc = f->bc;
+ a->bc.slot = SLOT_W;
+ p->push_back(a);
+ }
+ }
- cf->push_back(g);
return 0;
}
-int bc_parser::parse_fetch_clause(cf_node* cf) {
+int bc_parser::decode_fetch_clause(cf_node* cf) {
int r;
unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
cf->subtype = NST_TEX_CLAUSE;
- vvec grad_v, grad_h;
-
while (cnt--) {
fetch_node *n = sh->create_fetch();
cf->push_back(n);
if ((r = dec->decode_fetch(i, n->bc)))
return r;
+ if (n->bc.src_rel || n->bc.dst_rel)
+ gpr_reladdr = true;
+
+ }
+ return 0;
+}
+
+int bc_parser::prepare_fetch_clause(cf_node *cf) {
+
+ vvec grad_v, grad_h, texture_offsets;
+
+ for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+
+ fetch_node *n = static_cast<fetch_node*>(*I);
+ assert(n->is_valid());
unsigned flags = n->bc.op_ptr->flags;
sh->uses_gradients = true;
}
- if (flags & FF_SETGRAD) {
+ if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) {
vvec *grad = NULL;
case FETCH_OP_SET_GRADIENTS_H:
grad = &grad_h;
break;
+ case FETCH_OP_SET_TEXTURE_OFFSETS:
+ grad = &texture_offsets;
+ break;
default:
assert(!"unexpected SET_GRAD instruction");
return -1;
(*grad)[s] = sh->get_const_value(1.0f);
}
} else {
-
+ // Fold source values for instructions with hidden target values in to the instructions
+ // using them. The set instructions are later re-emitted by bc_finalizer
if (flags & FF_USEGRAD) {
n->src.resize(12);
std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4);
std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8);
+ } else if (flags & FF_USE_TEXTURE_OFFSETS) {
+ n->src.resize(8);
+ std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4);
} else {
n->src.resize(4);
}
}
}
+
return 0;
}
unsigned flags = c->bc.op_ptr->flags;
- if (flags & CF_LOOP_START) {
+ if (flags & CF_ALU) {
+ prepare_alu_clause(c);
+ } else if (flags & CF_FETCH) {
+ prepare_fetch_clause(c);
+ } else if (c->bc.op == CF_OP_CALL_FS) {
+ sh->init_call_fs(c);
+ c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
+ } else if (flags & CF_LOOP_START) {
prepare_loop(c);
} else if (c->bc.op == CF_OP_JUMP) {
prepare_if(c);
dep->move(c->parent->first, c);
c->replace_with(dep);
sh->simplify_dep_rep(dep);
- } else if (flags & CF_ALU && ctx.is_cayman()) {
- // postprocess cayman's 3-slot instructions (ex-trans-only)
- // FIXME it shouldn't be required with proper handling
- prepare_alu_clause(c);
} else if (flags & CF_EXP) {
// unroll burst exports
} while (1);
c->bc.end_of_program = eop;
- } else if (flags & (CF_STRM | CF_RAT)) {
+ } else if (flags & CF_MEM) {
unsigned burst_count = c->bc.burst_count;
unsigned eop = c->bc.end_of_program;
sh->get_gpr_value(true, c->bc.rw_gpr, s, false);
}
- if ((flags & CF_RAT) && (c->bc.type & 1)) { // indexed write
+ if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write
c->src.resize(8);
for(int s = 0; s < 3; ++s) {
c->src[4 + s] =
c->flags |= NF_DONT_HOIST | NF_DONT_MOVE;
}
+ if (flags & CF_EMIT) {
+ // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX
+ c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+ c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+ if (sh->target == TARGET_ES) {
+ // For ES shaders this is an export
+ c->flags |= NF_DONT_KILL;
+ }
+ }
+
if (!burst_count--)
break;
c->bc.end_of_program = eop;
+ } else if (flags & CF_EMIT) {
+ /* quick peephole */
+ cf_node *prev = static_cast<cf_node *>(c->prev);
+ if (c->bc.op == CF_OP_CUT_VERTEX &&
+ prev && prev->is_valid() &&
+ prev->bc.op == CF_OP_EMIT_VERTEX &&
+ c->bc.count == prev->bc.count) {
+ prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX);
+ prev->bc.end_of_program = c->bc.end_of_program;
+ c->remove();
+ }
+ else {
+ c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE;
+
+ c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+ c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT));
+ }
}
}
}
int bc_parser::prepare_loop(cf_node* c) {
+ assert(c->bc.addr-1 < cf_map.size());
cf_node *end = cf_map[c->bc.addr - 1];
assert(end->bc.op == CF_OP_LOOP_END);
c->insert_before(reg);
rep->move(c, end->next);
+ reg->src_loop = true;
+
loop_stack.push(reg);
return 0;
}
int bc_parser::prepare_if(cf_node* c) {
+ assert(c->bc.addr-1 < cf_map.size());
cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
+ if (!end)
+ return 0; // not quite sure how this happens, malformed input?
+
BCP_DUMP(
- cerr << "parsing JUMP @" << c->bc.id;
- cerr << "\n";
+ sblog << "parsing JUMP @" << c->bc.id;
+ sblog << "\n";
);
if (end->bc.op == CF_OP_ELSE) {
BCP_DUMP(
- cerr << " found ELSE : ";
+ sblog << " found ELSE : ";
dump::dump_op(end);
- cerr << "\n";
+ sblog << "\n";
);
c_else = end;
end = cf_map[c_else->bc.addr];
} else {
BCP_DUMP(
- cerr << " no else\n";
+ sblog << " no else\n";
);
c_else = end;
if (c_else->parent != c->parent)
c_else = NULL;
- if (end->parent != c->parent)
+ if (end && end->parent != c->parent)
end = NULL;
region_node *reg = sh->create_region();
return 0;
}
-int bc_parser::prepare_alu_clause(cf_node* c) {
-
- // loop over alu groups
- for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
- assert(I->subtype == NST_ALU_GROUP);
-
- alu_group_node *g = static_cast<alu_group_node*>(*I);
-
- // loop over alu_group items
- for (node_iterator I2 = g->begin(), E2 = g->end(); I2 != E2; ++I2) {
- if (I2->subtype != NST_ALU_PACKED_INST)
- continue;
-
- alu_packed_node *p = static_cast<alu_packed_node*>(*I2);
-
- if (p->count() == 3) {
- // cayman's scalar instruction that takes 3 or 4 slots
-
- // FIXME for simplicity we'll always add 4th slot,
- // but probably we might want to always remove 4th slot and make
- // sure that regalloc won't choose w component for dst
-
- alu_node *f = static_cast<alu_node*>(p->first);
- alu_node *a = sh->create_alu();
- a->src = f->src;
- a->dst.resize(f->dst.size());
- a->bc = f->bc;
- a->bc.slot = SLOT_W;
- p->push_back(a);
- }
- }
- }
-
- return 0;
-}
} // namespace r600_sb