#include <vector>
#include <stack>
+#include <ostream>
struct r600_bytecode;
struct r600_shader;
namespace r600_sb {
class hw_encoding_format;
+class node;
class alu_node;
class cf_node;
class fetch_node;
void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
};
+struct shader_stats {
+ unsigned ndw;
+ unsigned ngpr;
+ unsigned nstack;
+
+ unsigned cf; // clause instructions not included
+ unsigned alu;
+ unsigned alu_clauses;
+ unsigned fetch_clauses;
+ unsigned fetch;
+ unsigned alu_groups;
+
+ unsigned shaders; // number of shaders (for accumulated stats)
+
+ shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
+ fetch_clauses(), fetch(), alu_groups(), shaders() {}
+
+ void collect(node *n);
+ void accumulate(shader_stats &s);
+ void dump(std::ostream &o);
+ void dump_diff(std::ostream &o, shader_stats &s);
+};
+
class sb_context {
public:
+ shader_stats src_stats, opt_stats;
+
r600_isa *isa;
sb_hw_chip hw_chip;
static unsigned dskip_end;
static unsigned dskip_mode;
- sb_context()
- : isa(0), hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
+ sb_context() : src_stats(), opt_stats(), isa(0),
+ hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
if (r)
return r;
+ sh->ngpr = bc->ngpr;
+ sh->nstack = bc->nstack;
+
+ if (sh->target != TARGET_FETCH) {
+ sh->src_stats.ndw = bc->ndw;
+ sh->collect_stats(false);
+ }
+
if (enable_dump) {
- sh->ngpr = bc->ngpr;
- sh->nstack = bc->nstack;
bc_dump(*sh, cerr, bc->bytecode, bc_ndw).run();
}
int bc_parser::parse_alu_clause(cf_node* cf) {
unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
+ cf->subtype = NST_ALU_CLAUSE;
+
cgroup = 0;
memset(slots[0], 0, 5*sizeof(slots[0][0]));
int r;
unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
+ cf->subtype = NST_TEX_CLAUSE;
+
vvec grad_v, grad_h;
while (cnt--) {
}
void r600_sb_context_destroy(void * sctx) {
- if (sctx)
- delete (sb_context*)sctx;
+ if (sctx) {
+ sb_context *ctx = static_cast<sb_context*>(sctx);
+
+ if (sb_context::dump_stat) {
+ cerr << "context src stats: ";
+ ctx->src_stats.dump(cerr);
+ cerr << "context opt stats: ";
+ ctx->opt_stats.dump(cerr);
+ cerr << "context diff: ";
+ ctx->src_stats.dump_diff(cerr, ctx->opt_stats);
+ }
+
+ delete ctx;
+ }
}
int r600_sb_bytecode_process(struct r600_context *rctx,
SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
}
- delete sh;
if (sb_context::dump_stat) {
int64_t t = os_time_get_nano() - time_start;
cerr << "sb: processing shader " << shader_id << " done ( "
<< ((double)t)/1000000.0 << " ms ).\n";
+
+ sh->opt_stats.ndw = bc->ndw;
+ sh->collect_stats(true);
+
+ cerr << "src stats: ";
+ sh->src_stats.dump(cerr);
+ cerr << "opt stats: ";
+ sh->opt_stats.dump(cerr);
+ cerr << "diff: ";
+ sh->src_stats.dump_diff(cerr, sh->opt_stats);
}
+ delete sh;
return 0;
}
: ctx(sctx), next_temp_value_index(temp_regid_offset),
prep_regs_count(), pred_sels(),
regions(), inputs(), undef(), val_pool(sizeof(value)),
- pool(), all_nodes(), errors(), enable_dump(dump),
+ pool(), all_nodes(), src_stats(), opt_stats(), errors(), enable_dump(dump),
optimized(), id(id),
coal(*this), bbs(),
target(t), vt(ex), ex(*this), root(),
return c;
}
+void shader::collect_stats(bool opt) {
+ if (!sb_context::dump_stat)
+ return;
+
+ shader_stats &s = opt ? opt_stats : src_stats;
+
+ s.shaders = 1;
+ s.ngpr = ngpr;
+ s.nstack = nstack;
+ s.collect(root);
+
+ if (opt)
+ ctx.opt_stats.accumulate(s);
+ else
+ ctx.src_stats.accumulate(s);
+}
+
value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
value_map::iterator I = vm.find(key);
if (I != vm.end())
}
}
+void shader_stats::collect(node *n) {
+ if (n->is_alu_inst())
+ ++alu;
+ else if (n->is_fetch_inst())
+ ++fetch;
+ else if (n->is_container()) {
+ container_node *c = static_cast<container_node*>(n);
+
+ if (n->is_alu_group())
+ ++alu_groups;
+ else if (n->is_alu_clause())
+ ++alu_clauses;
+ else if (n->is_fetch_clause())
+ ++fetch_clauses;
+ else if (n->is_cf_inst())
+ ++cf;
+
+ if (!c->empty()) {
+ for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
+ collect(*I);
+ }
+ }
+ }
+}
+
+void shader_stats::accumulate(shader_stats& s) {
+ ++shaders;
+ ndw += s.ndw;
+ ngpr += s.ngpr;
+ nstack += s.nstack;
+
+ alu += s.alu;
+ alu_groups += s.alu_groups;
+ alu_clauses += s.alu_clauses;
+ fetch += s.fetch;
+ fetch_clauses += s.fetch_clauses;
+ cf += s.cf;
+}
+
+void shader_stats::dump(std::ostream& o) {
+ o << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
+ << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
+ << ", alu:" << alu << ", fetch:" << fetch
+ << ", fetch clauses:" << fetch_clauses
+ << ", cf:" << cf;
+
+ if (shaders > 1)
+ o << ", shaders:" << shaders;
+
+ o << "\n";
+}
+
+static void print_diff(std::ostream &o, unsigned d1, unsigned d2) {
+ if (d1)
+ o << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
+ else if (d2)
+ o << "N/A";
+ else
+ o << "0%";
+}
+
+void shader_stats::dump_diff(std::ostream& o, shader_stats& s) {
+ o << "dw:"; print_diff(o, ndw, s.ndw);
+ o << ", gpr:" ; print_diff(o, ngpr, s.ngpr);
+ o << ", stk:" ; print_diff(o, nstack, s.nstack);
+ o << ", alu groups:" ; print_diff(o, alu_groups, s.alu_groups);
+ o << ", alu clauses: " ; print_diff(o, alu_clauses, s.alu_clauses);
+ o << ", alu:" ; print_diff(o, alu, s.alu);
+ o << ", fetch:" ; print_diff(o, fetch, s.fetch);
+ o << ", fetch clauses:" ; print_diff(o, fetch_clauses, s.fetch_clauses);
+ o << ", cf:" ; print_diff(o, cf, s.cf);
+ o << "\n";
+}
+
} // namespace r600_sb