r600g/sb: separate bytecode decoding and parsing
authorVadim Girlin <vadimgirlin@gmail.com>
Sat, 11 May 2013 14:30:30 +0000 (18:30 +0400)
committerVadim Girlin <vadimgirlin@gmail.com>
Tue, 14 May 2013 13:36:25 +0000 (17:36 +0400)
Parsing and ir construction is required for optimization only,
it's unnecessary if we only need to print shader dump.
This should make new disassembler more tolerant to any new
features in the bytecode.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
src/gallium/drivers/r600/sb/sb_bc.h
src/gallium/drivers/r600/sb/sb_bc_builder.cpp
src/gallium/drivers/r600/sb/sb_bc_parser.cpp
src/gallium/drivers/r600/sb/sb_core.cpp
src/gallium/drivers/r600/sb/sb_shader.cpp
src/gallium/drivers/r600/sb/sb_shader.h

index 9c6ed46d051c28309b94b6de6d49df1f8317e641..9f65098848df1d6437b0469818c7007cf1ae777a 100644 (file)
@@ -674,40 +674,39 @@ class bc_parser {
        typedef std::stack<region_node*> region_stack;
        region_stack loop_stack;
 
-       int enable_dump;
-       int optimize;
-
 public:
 
-       bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader,
-                 int dump_source, int optimize) :
+       bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
                ctx(sctx), dec(), bc(bc), pshader(pshader),
                dw(), bc_ndw(), max_cf(),
                sh(), error(), slots(), cgroup(),
-               cf_map(), loop_stack(), enable_dump(dump_source),
-               optimize(optimize) { }
+               cf_map(), loop_stack() { }
 
-       int parse();
+       int decode();
+       int prepare();
 
        shader* get_shader() { assert(!error); return sh; }
 
 private:
 
-       int parse_shader();
+       int decode_shader();
 
        int parse_decls();
 
-       int parse_cf(unsigned &i, bool &eop);
+       int decode_cf(unsigned &i, bool &eop);
 
-       int parse_alu_clause(cf_node *cf);
-       int parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
+       int decode_alu_clause(cf_node *cf);
+       int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
 
-       int parse_fetch_clause(cf_node *cf);
+       int decode_fetch_clause(cf_node *cf);
 
        int prepare_ir();
+       int prepare_alu_clause(cf_node *cf);
+       int prepare_alu_group(cf_node* cf, alu_group_node *g);
+       int prepare_fetch_clause(cf_node *cf);
+
        int prepare_loop(cf_node *c);
        int prepare_if(cf_node *c);
-       int prepare_alu_clause(cf_node *c);
 
 };
 
index b0c2e41c33f87cc9fa6bc1003619fde8dc7e9c92..f40e4694b8d738f672297a515f0cc078eb0a197a 100644 (file)
@@ -94,10 +94,6 @@ int bc_builder::build() {
                cf_pos = bb.get_pos();
        }
 
-       if (sh.enable_dump) {
-               bc_dump(sh, cerr, &bb).run();
-       }
-
        return 0;
 }
 
index 83292874d6bee7fd5012dcb5cbc05db6d79c550c..9f3ecc51cd9c907836124542a59de1577a01b150 100644 (file)
@@ -47,7 +47,7 @@ namespace r600_sb {
 
 using std::cerr;
 
-int bc_parser::parse() {
+int bc_parser::decode() {
 
        dw = bc->bytecode;
        bc_ndw = bc->ndw;
@@ -71,47 +71,27 @@ int bc_parser::parse() {
                        t = TARGET_FETCH;
        }
 
-       sh = new shader(ctx, t, bc->debug_id, enable_dump);
-       int r = parse_shader();
+       sh = new shader(ctx, t, bc->debug_id);
+       int r = decode_shader();
 
        delete dec;
 
-       if (r)
-               return r;
-
        sh->ngpr = bc->ngpr;
        sh->nstack = bc->nstack;
 
-       if (sh->target != TARGET_FETCH) {
-               sh->src_stats.ndw = bc->ndw;
-               sh->collect_stats(false);
-       }
-
-       if (enable_dump) {
-               bc_dump(*sh, cerr, bc->bytecode, bc_ndw).run();
-       }
-
-       if (!optimize)
-               return 0;
-
-       prepare_ir();
-
        return r;
 }
 
-int bc_parser::parse_shader() {
+int bc_parser::decode_shader() {
        int r = 0;
        unsigned i = 0;
        bool eop = false;
 
        sh->init();
 
-       if (pshader)
-               parse_decls();
-
        do {
                eop = false;
-               if ((r = parse_cf(i, eop)))
+               if ((r = decode_cf(i, eop)))
                        return r;
 
        } while (!eop || (i >> 1) <= max_cf);
@@ -119,34 +99,34 @@ int bc_parser::parse_shader() {
        return 0;
 }
 
-int bc_parser::parse_decls() {
-
-//     sh->prepare_regs(rs.bc.ngpr);
-
-       if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
+int bc_parser::prepare() {
+       int r = 0;
+       if ((r = parse_decls()))
+               return r;
+       if ((r = prepare_ir()))
+               return r;
+       return 0;
+}
 
-#if SB_NO_ARRAY_INFO
+int bc_parser::parse_decls() {
 
+       if (!pshader) {
                sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
+               return 0;
+       }
 
-#else
+       if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
 
                assert(pshader->num_arrays);
 
                if (pshader->num_arrays) {
-
                        for (unsigned i = 0; i < pshader->num_arrays; ++i) {
                                r600_shader_array &a = pshader->arrays[i];
                                sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask);
                        }
-
                } else {
                        sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F);
                }
-
-
-#endif
-
        }
 
        if (sh->target == TARGET_VS)
@@ -183,12 +163,10 @@ int bc_parser::parse_decls() {
                }
        }
 
-
        return 0;
 }
 
-
-int bc_parser::parse_cf(unsigned &i, bool &eop) {
+int bc_parser::decode_cf(unsigned &i, bool &eop) {
 
        int r;
 
@@ -210,18 +188,15 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
        cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags;
 
        if (flags & CF_ALU) {
-               if ((r = parse_alu_clause(cf)))
+               if ((r = decode_alu_clause(cf)))
                        return r;
        } else if (flags & CF_FETCH) {
-               if ((r = parse_fetch_clause(cf)))
+               if ((r = decode_fetch_clause(cf)))
                        return r;;
        } else if (flags & CF_EXP) {
                assert(!cf->bc.rw_rel);
        } else if (flags & (CF_STRM | CF_RAT)) {
                assert(!cf->bc.rw_rel);
-       } else if (cf->bc.op == CF_OP_CALL_FS) {
-               sh->init_call_fs(cf);
-               cf->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
        } else if (flags & CF_BRANCH) {
                if (cf->bc.addr > max_cf)
                        max_cf = cf->bc.addr;
@@ -232,7 +207,7 @@ int bc_parser::parse_cf(unsigned &i, bool &eop) {
        return 0;
 }
 
-int bc_parser::parse_alu_clause(cf_node* cf) {
+int bc_parser::decode_alu_clause(cf_node* cf) {
        unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt;
 
        cf->subtype = NST_ALU_CLAUSE;
@@ -243,7 +218,7 @@ int bc_parser::parse_alu_clause(cf_node* cf) {
        unsigned ng = 0;
 
        do {
-               parse_alu_group(cf, i, gcnt);
+               decode_alu_group(cf, i, gcnt);
                assert(gcnt <= cnt);
                cnt -= gcnt;
                ng++;
@@ -252,16 +227,17 @@ int bc_parser::parse_alu_clause(cf_node* cf) {
        return 0;
 }
 
-int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
+int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
        int r;
        alu_node *n;
        alu_group_node *g = sh->create_alu_group();
 
        cgroup = !cgroup;
        memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
-
        gcnt = 0;
 
+       unsigned literal_mask = 0;
+
        do {
                n = sh->create_alu();
                g->push_back(n);
@@ -280,11 +256,62 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
 
        assert(n->bc.last);
 
-       unsigned literal_mask = 0;
+       for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
+               n = static_cast<alu_node*>(*I);
+
+               for (int k = 0; k < n->bc.op_ptr->src_count; ++k) {
+                       bc_alu_src &src = n->bc.src[k];
+                       if (src.sel == ALU_SRC_LITERAL) {
+                               literal_mask |= (1 << src.chan);
+                               src.value.u = dw[i + src.chan];
+                       }
+               }
+       }
+
+       unsigned literal_ndw = 0;
+       while (literal_mask) {
+               g->literals.push_back(dw[i + literal_ndw]);
+               literal_ndw += 1;
+               literal_mask >>= 1;
+       }
+
+       literal_ndw = (literal_ndw + 1) & ~1u;
+
+       i += literal_ndw;
+       gcnt += literal_ndw >> 1;
+
+       cf->push_back(g);
+       return 0;
+}
+
+int bc_parser::prepare_alu_clause(cf_node* cf) {
+
+       // loop over alu groups
+       for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+               assert(I->subtype == NST_ALU_GROUP);
+               alu_group_node *g = static_cast<alu_group_node*>(*I);
+               prepare_alu_group(cf, g);
+       }
+
+       return 0;
+}
+
+int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
+
+       alu_node *n;
+
+       cgroup = !cgroup;
+       memset(slots[cgroup], 0, 5*sizeof(slots[0][0]));
 
        for (node_iterator I = g->begin(), E = g->end();
                        I != E; ++I) {
                n = static_cast<alu_node*>(*I);
+
+               if (!sh->assign_slot(n, slots[cgroup])) {
+                       assert(!"alu slot assignment failed");
+                       return -1;
+               }
+
                unsigned src_count = n->bc.op_ptr->src_count;
 
                if (ctx.alu_slots(n->bc.op) & AF_4SLOT)
@@ -340,10 +367,6 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
                        bc_alu_src &src = n->bc.src[s];
 
                        if (src.sel == ALU_SRC_LITERAL) {
-                               unsigned chan = src.chan;
-
-                               literal_mask |= (1 << chan);
-                               src.value.u = dw[i+chan];
                                n->src[s] = sh->get_const_value(src.value);
                        } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) {
                                unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ?
@@ -430,38 +453,52 @@ int bc_parser::parse_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) {
 
        if (p) {
                g->push_front(p);
-       }
 
-       unsigned literal_ndw = 0;
-       while (literal_mask) {
-               g->literals.push_back(dw[i + literal_ndw]);
-               literal_ndw += 1;
-               literal_mask >>= 1;
-       }
+               if (p->count() == 3 && ctx.is_cayman()) {
+                       // cayman's scalar instruction that can use 3 or 4 slots
 
-       literal_ndw = (literal_ndw + 1) & ~1u;
+                       // FIXME for simplicity we'll always add 4th slot,
+                       // but probably we might want to always remove 4th slot and make
+                       // sure that regalloc won't choose 'w' component for dst
 
-       i += literal_ndw;
-       gcnt += literal_ndw >> 1;
+                       alu_node *f = static_cast<alu_node*>(p->first);
+                       alu_node *a = sh->create_alu();
+                       a->src = f->src;
+                       a->dst.resize(f->dst.size());
+                       a->bc = f->bc;
+                       a->bc.slot = SLOT_W;
+                       p->push_back(a);
+               }
+       }
 
-       cf->push_back(g);
        return 0;
 }
 
-int bc_parser::parse_fetch_clause(cf_node* cf) {
+int bc_parser::decode_fetch_clause(cf_node* cf) {
        int r;
        unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1;
 
        cf->subtype = NST_TEX_CLAUSE;
 
-       vvec grad_v, grad_h;
-
        while (cnt--) {
                fetch_node *n = sh->create_fetch();
                cf->push_back(n);
                if ((r = dec->decode_fetch(i, n->bc)))
                        return r;
 
+       }
+       return 0;
+}
+
+int bc_parser::prepare_fetch_clause(cf_node *cf) {
+
+       vvec grad_v, grad_h;
+
+       for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) {
+
+               fetch_node *n = static_cast<fetch_node*>(*I);
+               assert(n->is_valid());
+
                unsigned flags = n->bc.op_ptr->flags;
 
                unsigned vtx = flags & FF_VTX;
@@ -527,6 +564,7 @@ int bc_parser::parse_fetch_clause(cf_node* cf) {
 
                }
        }
+
        return 0;
 }
 
@@ -540,7 +578,14 @@ int bc_parser::prepare_ir() {
 
                unsigned flags = c->bc.op_ptr->flags;
 
-               if (flags & CF_LOOP_START) {
+               if (flags & CF_ALU) {
+                       prepare_alu_clause(c);
+               } else if (flags & CF_FETCH) {
+                       prepare_fetch_clause(c);
+               } else if (c->bc.op == CF_OP_CALL_FS) {
+                       sh->init_call_fs(c);
+                       c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE;
+               } else if (flags & CF_LOOP_START) {
                        prepare_loop(c);
                } else if (c->bc.op == CF_OP_JUMP) {
                        prepare_if(c);
@@ -560,10 +605,6 @@ int bc_parser::prepare_ir() {
                                dep->move(c->parent->first, c);
                        c->replace_with(dep);
                        sh->simplify_dep_rep(dep);
-               } else if (flags & CF_ALU && ctx.is_cayman()) {
-                       // postprocess cayman's 3-slot instructions (ex-trans-only)
-                       // FIXME it shouldn't be required with proper handling
-                       prepare_alu_clause(c);
                } else if (flags & CF_EXP) {
 
                        // unroll burst exports
@@ -735,40 +776,5 @@ int bc_parser::prepare_if(cf_node* c) {
        return 0;
 }
 
-int bc_parser::prepare_alu_clause(cf_node* c) {
-
-       // loop over alu groups
-       for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
-               assert(I->subtype == NST_ALU_GROUP);
-
-               alu_group_node *g = static_cast<alu_group_node*>(*I);
-
-               // loop over alu_group items
-               for (node_iterator I2 = g->begin(), E2 = g->end(); I2 != E2; ++I2) {
-                       if (I2->subtype != NST_ALU_PACKED_INST)
-                               continue;
-
-                       alu_packed_node *p = static_cast<alu_packed_node*>(*I2);
-
-                       if (p->count() == 3) {
-                               // cayman's scalar instruction that takes 3 or 4 slots
-
-                               // FIXME for simplicity we'll always add 4th slot,
-                               // but probably we might want to always remove 4th slot and make
-                               // sure that regalloc won't choose w component for dst
-
-                               alu_node *f = static_cast<alu_node*>(p->first);
-                               alu_node *a = sh->create_alu();
-                               a->src = f->src;
-                               a->dst.resize(f->dst.size());
-                               a->bc = f->bc;
-                               a->bc.slot = SLOT_W;
-                               p->push_back(a);
-                       }
-               }
-       }
-
-       return 0;
-}
 
 } // namespace r600_sb
index b919fa419d4e2c31cde9aeed00a7ddd1a60eb641..17a8b878e0f69b340fdb6c8ef279e37f76f14996 100644 (file)
@@ -94,7 +94,7 @@ void r600_sb_context_destroy(void * sctx) {
 int r600_sb_bytecode_process(struct r600_context *rctx,
                              struct r600_bytecode *bc,
                              struct r600_shader *pshader,
-                             int dump_source_bytecode,
+                             int dump_bytecode,
                              int optimize) {
        int r = 0;
        unsigned shader_id = bc->debug_id;
@@ -111,13 +111,29 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 
        SB_DUMP_STAT( cerr << "\nsb: shader " << shader_id << "\n"; );
 
-       bc_parser parser(*ctx, bc, pshader, dump_source_bytecode, optimize);
+       bc_parser parser(*ctx, bc, pshader);
 
-       if ((r = parser.parse())) {
-               assert(0);
+       if ((r = parser.decode())) {
+               assert(!"sb: bytecode decoding error");
                return r;
        }
 
+       shader *sh = parser.get_shader();
+
+       if (dump_bytecode) {
+               bc_dump(*sh, cerr, bc->bytecode, bc->ndw).run();
+       }
+
+       if (!optimize) {
+               delete sh;
+               return 0;
+       }
+
+       if (sh->target != TARGET_FETCH) {
+               sh->src_stats.ndw = bc->ndw;
+               sh->collect_stats(false);
+       }
+
        /* skip some shaders (use shaders from default backend)
         * dskip_start - range start, dskip_end - range_end,
         * e.g. start = 5, end = 6 means shaders 5 & 6
@@ -138,14 +154,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
                }
        }
 
-       shader *sh = parser.get_shader();
-       SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
-
-       if (!optimize) {
-               delete sh;
-               return 0;
+       if ((r = parser.prepare())) {
+               assert(!"sb: bytecode parsing error");
+               return r;
        }
 
+       SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
+
 #define SB_RUN_PASS(n, dump) \
        do { \
                r = n(*sh).run(); \
@@ -222,8 +237,13 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
                return r;
        }
 
+       bytecode &nbc = builder.get_bytecode();
+
+       if (dump_bytecode) {
+               bc_dump(*sh, cerr, &nbc).run();
+       }
+
        if (!sb_context::dry_run) {
-               bytecode &nbc = builder.get_bytecode();
 
                free(bc->bytecode);
                bc->ndw = nbc.ndw();
@@ -233,10 +253,9 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
                bc->ngpr = sh->ngpr;
                bc->nstack = sh->nstack;
        } else {
-               SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
+               SB_DUMP_STAT( cerr << "sb: dry run: optimized bytecode is not used\n"; );
        }
 
-
        if (sb_context::dump_stat) {
                int64_t t = os_time_get_nano() - time_start;
 
index 5944ba66f48b072580b8267bf1e6622291cd5848..f0665efb2caada2a733d1e26db5fdae420eb3166 100644 (file)
@@ -33,11 +33,11 @@ namespace r600_sb {
 
 using std::cerr;
 
-shader::shader(sb_context &sctx, shader_target t, unsigned id, bool dump)
+shader::shader(sb_context &sctx, shader_target t, unsigned id)
 : ctx(sctx), next_temp_value_index(temp_regid_offset),
   prep_regs_count(), pred_sels(),
   regions(), inputs(), undef(), val_pool(sizeof(value)),
-  pool(), all_nodes(), src_stats(), opt_stats(), errors(), enable_dump(dump),
+  pool(), all_nodes(), src_stats(), opt_stats(), errors(),
   optimized(), id(id),
   coal(*this), bbs(),
   target(t), vt(ex), ex(*this), root(),
index b2e3837c4c0963c19f90414c60b4494e74f02a1a..5362e395e97aafc2eb5b3a648c87132068925c10 100644 (file)
@@ -271,7 +271,6 @@ public:
 
        error_map errors;
 
-       bool enable_dump;
        bool optimized;
 
        unsigned id;
@@ -296,7 +295,7 @@ public:
 
        unsigned ngpr, nstack;
 
-       shader(sb_context &sctx, shader_target t, unsigned id, bool dump);
+       shader(sb_context &sctx, shader_target t, unsigned id);
 
        ~shader();