r600g/sb: collect shader statistics
authorVadim Girlin <vadimgirlin@gmail.com>
Tue, 30 Apr 2013 16:58:52 +0000 (20:58 +0400)
committerVadim Girlin <vadimgirlin@gmail.com>
Tue, 30 Apr 2013 17:50:48 +0000 (21:50 +0400)
Collects various statistical information for each shader
and total stats for contexts.

Printed with R600_DEBUG=sb,sbstat

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
src/gallium/drivers/r600/sb/sb_bc.h
src/gallium/drivers/r600/sb/sb_bc_parser.cpp
src/gallium/drivers/r600/sb/sb_core.cpp
src/gallium/drivers/r600/sb/sb_shader.cpp
src/gallium/drivers/r600/sb/sb_shader.h

index 459dcac13cebb0ac3916017b2d493618ff15d849..0b9bc07085ca48d2c0f6ce8fbf0dc21944c09561 100644 (file)
@@ -34,6 +34,7 @@ extern "C" {
 
 #include <vector>
 #include <stack>
+#include <ostream>
 
 struct r600_bytecode;
 struct r600_shader;
@@ -41,6 +42,7 @@ struct r600_shader;
 namespace r600_sb {
 
 class hw_encoding_format;
+class node;
 class alu_node;
 class cf_node;
 class fetch_node;
@@ -456,10 +458,35 @@ struct bc_fetch {
        void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
 };
 
+struct shader_stats {
+       unsigned        ndw;
+       unsigned        ngpr;
+       unsigned        nstack;
+
+       unsigned        cf; // clause instructions not included
+       unsigned        alu;
+       unsigned        alu_clauses;
+       unsigned        fetch_clauses;
+       unsigned        fetch;
+       unsigned        alu_groups;
+
+       unsigned        shaders;                // number of shaders (for accumulated stats)
+
+       shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
+                       fetch_clauses(), fetch(), alu_groups(), shaders() {}
+
+       void collect(node *n);
+       void accumulate(shader_stats &s);
+       void dump(std::ostream &o);
+       void dump_diff(std::ostream &o, shader_stats &s);
+};
+
 class sb_context {
 
 public:
 
+       shader_stats src_stats, opt_stats;
+
        r600_isa *isa;
 
        sb_hw_chip hw_chip;
@@ -484,8 +511,8 @@ public:
        static unsigned dskip_end;
        static unsigned dskip_mode;
 
-       sb_context()
-               : isa(0), hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
+       sb_context() : src_stats(), opt_stats(), isa(0),
+                       hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
 
        int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
 
index fa9e2e0e38d32cafde5aef4601963bc43b4511bc..cc7552827b98106d1b8b2f7adf96bfec878bb997 100644 (file)
@@ -77,9 +77,15 @@ int bc_parser::parse() {
        if (r)
                return r;
 
+       sh->ngpr = bc->ngpr;
+       sh->nstack = bc->nstack;
+
+       if (sh->target != TARGET_FETCH) {
+               sh->src_stats.ndw = bc->ndw;
+               sh->collect_stats(false);
+       }
+
        if (enable_dump) {
-               sh->ngpr = bc->ngpr;
-               sh->nstack = bc->nstack;
                bc_dump(*sh, cerr, bc->bytecode, bc_ndw).run();
        }
 
@@ -228,6 +234,8 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
 int bc_parser::parse_alu_clause(cf_node* cf) {
        unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
 
+       cf->subtype = NST_ALU_CLAUSE;
+
        cgroup = 0;
        memset(slots[0], 0, 5*sizeof(slots[0][0]));
 
@@ -443,6 +451,8 @@ int bc_parser::parse_fetch_clause(cf_node* cf) {
        int r;
        unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
 
+       cf->subtype = NST_TEX_CLAUSE;
+
        vvec grad_v, grad_h;
 
        while (cnt--) {
index bc200a4c9b6214ce00881bf8fa31c01644522777..9f81ed48e03f0bd6c0f6be9827fdba08c346af7c 100644 (file)
@@ -75,8 +75,20 @@ sb_context *r600_sb_context_create(struct r600_context *rctx) {
 }
 
 void r600_sb_context_destroy(void * sctx) {
-       if (sctx)
-               delete (sb_context*)sctx;
+       if (sctx) {
+               sb_context *ctx = static_cast<sb_context*>(sctx);
+
+               if (sb_context::dump_stat) {
+                       cerr << "context src stats: ";
+                       ctx->src_stats.dump(cerr);
+                       cerr << "context opt stats: ";
+                       ctx->opt_stats.dump(cerr);
+                       cerr << "context diff: ";
+                       ctx->src_stats.dump_diff(cerr, ctx->opt_stats);
+               }
+
+               delete ctx;
+       }
 }
 
 int r600_sb_bytecode_process(struct r600_context *rctx,
@@ -227,15 +239,25 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
                SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
        }
 
-       delete sh;
 
        if (sb_context::dump_stat) {
                int64_t t = os_time_get_nano() - time_start;
 
                cerr << "sb: processing shader " << shader_id << " done ( "
                                << ((double)t)/1000000.0 << " ms ).\n";
+
+               sh->opt_stats.ndw = bc->ndw;
+               sh->collect_stats(true);
+
+               cerr << "src stats: ";
+               sh->src_stats.dump(cerr);
+               cerr << "opt stats: ";
+               sh->opt_stats.dump(cerr);
+               cerr << "diff: ";
+               sh->src_stats.dump_diff(cerr, sh->opt_stats);
        }
 
+       delete sh;
        return 0;
 }
 
index 91f7c5dba5eb7b5b732beda3b66d5ea52693952d..a59f8a35525bfea5009f7da41a68984daf5cbf1d 100644 (file)
@@ -37,7 +37,7 @@ shader::shader(sb_context &sctx, shader_target t, unsigned id, bool dump)
 : ctx(sctx), next_temp_value_index(temp_regid_offset),
   prep_regs_count(), pred_sels(),
   regions(), inputs(), undef(), val_pool(sizeof(value)),
-  pool(), all_nodes(), errors(), enable_dump(dump),
+  pool(), all_nodes(), src_stats(), opt_stats(), errors(), enable_dump(dump),
   optimized(), id(id),
   coal(*this), bbs(),
   target(t), vt(ex), ex(*this), root(),
@@ -557,6 +557,23 @@ alu_node* shader::clone(alu_node* n) {
        return c;
 }
 
+void shader::collect_stats(bool opt) {
+       if (!sb_context::dump_stat)
+               return;
+
+       shader_stats &s = opt ? opt_stats : src_stats;
+
+       s.shaders = 1;
+       s.ngpr = ngpr;
+       s.nstack = nstack;
+       s.collect(root);
+
+       if (opt)
+               ctx.opt_stats.accumulate(s);
+       else
+               ctx.src_stats.accumulate(s);
+}
+
 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
        value_map::iterator I = vm.find(key);
        if (I != vm.end())
@@ -657,4 +674,78 @@ sched_queue_id shader::get_queue_id(node* n) {
        }
 }
 
+void shader_stats::collect(node *n) {
+       if (n->is_alu_inst())
+               ++alu;
+       else if (n->is_fetch_inst())
+               ++fetch;
+       else if (n->is_container()) {
+               container_node *c = static_cast<container_node*>(n);
+
+               if (n->is_alu_group())
+                       ++alu_groups;
+               else if (n->is_alu_clause())
+                       ++alu_clauses;
+               else if (n->is_fetch_clause())
+                       ++fetch_clauses;
+               else if (n->is_cf_inst())
+                       ++cf;
+
+               if (!c->empty()) {
+                       for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
+                               collect(*I);
+                       }
+               }
+       }
+}
+
+void shader_stats::accumulate(shader_stats& s) {
+       ++shaders;
+       ndw += s.ndw;
+       ngpr += s.ngpr;
+       nstack += s.nstack;
+
+       alu += s.alu;
+       alu_groups += s.alu_groups;
+       alu_clauses += s.alu_clauses;
+       fetch += s.fetch;
+       fetch_clauses += s.fetch_clauses;
+       cf += s.cf;
+}
+
+void shader_stats::dump(std::ostream& o) {
+       o << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
+                       << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
+                       << ", alu:" << alu << ", fetch:" << fetch
+                       << ", fetch clauses:" << fetch_clauses
+                       << ", cf:" << cf;
+
+       if (shaders > 1)
+               o << ", shaders:" << shaders;
+
+       o << "\n";
+}
+
+static void print_diff(std::ostream &o, unsigned d1, unsigned d2) {
+       if (d1)
+               o << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
+       else if (d2)
+               o << "N/A";
+       else
+               o << "0%";
+}
+
+void shader_stats::dump_diff(std::ostream& o, shader_stats& s) {
+       o << "dw:"; print_diff(o, ndw, s.ndw);
+       o << ", gpr:" ; print_diff(o, ngpr, s.ngpr);
+       o << ", stk:" ; print_diff(o, nstack, s.nstack);
+       o << ", alu groups:" ; print_diff(o, alu_groups, s.alu_groups);
+       o << ", alu clauses: " ; print_diff(o, alu_clauses, s.alu_clauses);
+       o << ", alu:" ; print_diff(o, alu, s.alu);
+       o << ", fetch:" ; print_diff(o, fetch, s.fetch);
+       o << ", fetch clauses:" ; print_diff(o, fetch_clauses, s.fetch_clauses);
+       o << ", cf:" ; print_diff(o, cf, s.cf);
+       o << "\n";
+}
+
 } // namespace r600_sb
index 039d1fa5c845e1382f1bb811d445238acc0e1a9b..bc6e582c09b065e2a32f86aac62a87178d6e8e67 100644 (file)
@@ -267,6 +267,8 @@ class shader {
        std::vector<node*> all_nodes;
 
 public:
+       shader_stats src_stats, opt_stats;
+
        error_map errors;
 
        bool enable_dump;
@@ -394,6 +396,8 @@ public:
 
        sb_value_pool& get_value_pool() { return val_pool; }
 
+       void collect_stats(bool opt);
+
 private:
        value* create_value(value_kind k, sel_chan regid, unsigned ver);
        value* get_value(value_kind kind, sel_chan id,