r600/sb: update last_cf if alu is the last clause
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
index 355eb63810c4b6c82be75f58b6efc3e83a56a705..2ec4db624a585518fae5b0af54463537031f9f5c 100644 (file)
 
 namespace r600_sb {
 
+void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
+
+       alu_group_node *g = sh.create_alu_group();
+       alu_node *a = sh.create_alu();
+
+       a->bc.set_op(ALU_OP0_NOP);
+       a->bc.last = 1;
+
+       g->push_back(a);
+       b4->insert_before(g);
+}
+
 int bc_finalizer::run() {
 
        run_on(sh.root);
@@ -63,7 +75,7 @@ int bc_finalizer::run() {
 
        // workaround for some problems on r6xx/7xx
        // add ALU NOP to each vertex shader
-       if (!ctx.is_egcm() && sh.target == TARGET_VS) {
+       if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
                cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
 
                alu_group_node *g = sh.create_alu_group();
@@ -83,14 +95,18 @@ int bc_finalizer::run() {
                last_cf = c;
        }
 
-       if (last_cf->bc.op_ptr->flags & CF_ALU) {
+       if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
                last_cf = sh.create_cf(CF_OP_NOP);
                sh.root->push_back(last_cf);
        }
 
-       if (ctx.is_cayman())
-               last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
-       else
+       if (ctx.is_cayman()) {
+               if (!last_cf) {
+                       cf_node *c = sh.create_cf(CF_OP_CF_END);
+                       sh.root->push_back(c);
+               } else
+                       last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
+       } else
                last_cf->bc.end_of_program = 1;
 
        for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
@@ -106,9 +122,19 @@ int bc_finalizer::run() {
 
 void bc_finalizer::finalize_loop(region_node* r) {
 
+       update_nstack(r);
+
        cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
        cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
 
+       // Update last_cf, but don't overwrite it if it's outside the current loop nest since
+       // it may point to a cf that is later in program order.
+       // The single parent level check is sufficient since finalize_loop() is processed in
+       // reverse order from innermost to outermost loop nest level.
+       if (!last_cf || last_cf->get_parent_region() == r) {
+               last_cf = loop_end;
+       }
+
        loop_start->jump_after(loop_end);
        loop_end->jump_after(loop_start);
 
@@ -173,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
                cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
                cf_node *if_pop = sh.create_cf(CF_OP_POP);
 
+               if (!last_cf || last_cf->get_parent_region() == r) {
+                       last_cf = if_pop;
+               }
                if_pop->bc.pop_count = 1;
                if_pop->jump_after(if_pop);
 
@@ -205,12 +234,12 @@ void bc_finalizer::finalize_if(region_node* r) {
 }
 
 void bc_finalizer::run_on(container_node* c) {
-
+       node *prev_node = NULL;
        for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
                node *n = *I;
 
                if (n->is_alu_group()) {
-                       finalize_alu_group(static_cast<alu_group_node*>(n));
+                       finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
                } else {
                        if (n->is_alu_clause()) {
                                cf_node *c = static_cast<cf_node*>(n);
@@ -237,6 +266,7 @@ void bc_finalizer::run_on(container_node* c) {
                                                }
                                        }
                                }
+                               last_cf = c;
                        } else if (n->is_fetch_inst()) {
                                finalize_fetch(static_cast<fetch_node*>(n));
                        } else if (n->is_cf_inst()) {
@@ -245,21 +275,26 @@ void bc_finalizer::run_on(container_node* c) {
                        if (n->is_container())
                                run_on(static_cast<container_node*>(n));
                }
+               prev_node = n;
        }
 }
 
-void bc_finalizer::finalize_alu_group(alu_group_node* g) {
+void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
 
        alu_node *last = NULL;
+       alu_group_node *prev_g = NULL;
+       bool add_nop = false;
+       if (prev_node && prev_node->is_alu_group()) {
+               prev_g = static_cast<alu_group_node*>(prev_node);
+       }
 
        for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
                alu_node *n = static_cast<alu_node*>(*I);
                unsigned slot = n->bc.slot;
-
                value *d = n->dst.empty() ? NULL : n->dst[0];
 
                if (d && d->is_special_reg()) {
-                       assert(n->bc.op_ptr->flags & AF_MOVA);
+                       assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
                        d = NULL;
                }
 
@@ -269,7 +304,8 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
                        assert(fdst.chan() == slot || slot == SLOT_TRANS);
                }
 
-               n->bc.dst_gpr = fdst.sel();
+               if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
+                       n->bc.dst_gpr = fdst.sel();
                n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
 
 
@@ -293,17 +329,22 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
 
                update_ngpr(n->bc.dst_gpr);
 
-               finalize_alu_src(g, n);
+               add_nop |= finalize_alu_src(g, n, prev_g);
 
                last = n;
        }
 
+       if (add_nop) {
+               if (sh.get_ctx().r6xx_gpr_index_workaround) {
+                       insert_rv6xx_load_ar_workaround(g);
+               }
+       }
        last->bc.last = 1;
 }
 
-void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
+bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
        vvec &sv = a->src;
-
+       bool add_nop = false;
        FBC_DUMP(
                sblog << "finalize_alu_src: ";
                dump::dump_op(a);
@@ -330,6 +371,15 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
                        if (!v->rel->is_const()) {
                                src.rel = 1;
                                update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
+                               if (prev && !add_nop) {
+                                       for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+                                               alu_node *pn = static_cast<alu_node*>(*pI);
+                                               if (pn->bc.dst_gpr == src.sel) {
+                                                       add_nop = true;
+                                                       break;
+                                               }
+                                       }
+                               }
                        } else
                                src.rel = 0;
 
@@ -387,88 +437,115 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
                        assert(!"unknown value kind");
                        break;
                }
+               if (prev && !add_nop) {
+                       for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+                               alu_node *pn = static_cast<alu_node*>(*pI);
+                               if (pn->bc.dst_rel) {
+                                       if (pn->bc.dst_gpr == src.sel) {
+                                               add_nop = true;
+                                               break;
+                                       }
+                               }
+                       }
+               }
        }
 
        while (si < 3) {
                a->bc.src[si++].sel = 0;
        }
+       return add_nop;
 }
 
-void bc_finalizer::emit_set_grad(fetch_node* f) {
+void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
+{
+       int reg = -1;
 
-       assert(f->src.size() == 12);
-       unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
+       for (unsigned chan = 0; chan < 4; ++chan) {
 
-       unsigned arg_start = 0;
+               dst.bc.dst_sel[chan] = SEL_MASK;
 
-       for (unsigned op = 0; op < 2; ++op) {
-               fetch_node *n = sh.create_fetch();
-               n->bc.set_op(ops[op]);
+               unsigned sel = SEL_MASK;
 
-               // FIXME extract this loop into a separate method and reuse it
+               value *v = src.src[arg_start + chan];
 
-               int reg = -1;
+               if (!v || v->is_undef()) {
+                       sel = SEL_MASK;
+               } else if (v->is_const()) {
+                       literal l = v->literal_value;
+                       if (l == literal(0))
+                               sel = SEL_0;
+                       else if (l == literal(1.0f))
+                               sel = SEL_1;
+                       else {
+                               sblog << "invalid fetch constant operand  " << chan << " ";
+                               dump::dump_op(&src);
+                               sblog << "\n";
+                               abort();
+                       }
 
-               arg_start += 4;
+               } else if (v->is_any_gpr()) {
+                       unsigned vreg = v->gpr.sel();
+                       unsigned vchan = v->gpr.chan();
 
-               for (unsigned chan = 0; chan < 4; ++chan) {
+                       if (reg == -1)
+                               reg = vreg;
+                       else if ((unsigned)reg != vreg) {
+                               sblog << "invalid fetch source operand  " << chan << " ";
+                               dump::dump_op(&src);
+                               sblog << "\n";
+                               abort();
+                       }
 
-                       n->bc.dst_sel[chan] = SEL_MASK;
+                       sel = vchan;
 
-                       unsigned sel = SEL_MASK;
+               } else {
+                       sblog << "invalid fetch source operand  " << chan << " ";
+                       dump::dump_op(&src);
+                       sblog << "\n";
+                       abort();
+               }
 
-                       value *v = f->src[arg_start + chan];
+               dst.bc.src_sel[chan] = sel;
+       }
 
-                       if (!v || v->is_undef()) {
-                               sel = SEL_MASK;
-                       } else if (v->is_const()) {
-                               literal l = v->literal_value;
-                               if (l == literal(0))
-                                       sel = SEL_0;
-                               else if (l == literal(1.0f))
-                                       sel = SEL_1;
-                               else {
-                                       sblog << "invalid fetch constant operand  " << chan << " ";
-                                       dump::dump_op(f);
-                                       sblog << "\n";
-                                       abort();
-                               }
+       if (reg >= 0)
+               update_ngpr(reg);
 
-                       } else if (v->is_any_gpr()) {
-                               unsigned vreg = v->gpr.sel();
-                               unsigned vchan = v->gpr.chan();
+       dst.bc.src_gpr = reg >= 0 ? reg : 0;
+}
 
-                               if (reg == -1)
-                                       reg = vreg;
-                               else if ((unsigned)reg != vreg) {
-                                       sblog << "invalid fetch source operand  " << chan << " ";
-                                       dump::dump_op(f);
-                                       sblog << "\n";
-                                       abort();
-                               }
+void bc_finalizer::emit_set_grad(fetch_node* f) {
 
-                               sel = vchan;
+       assert(f->src.size() == 12 || f->src.size() == 13);
+       unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
 
-                       } else {
-                               sblog << "invalid fetch source operand  " << chan << " ";
-                               dump::dump_op(f);
-                               sblog << "\n";
-                               abort();
-                       }
+       unsigned arg_start = 0;
 
-                       n->bc.src_sel[chan] = sel;
-               }
+       for (unsigned op = 0; op < 2; ++op) {
+               fetch_node *n = sh.create_fetch();
+               n->bc.set_op(ops[op]);
 
-               if (reg >= 0)
-                       update_ngpr(reg);
+               arg_start += 4;
 
-               n->bc.src_gpr = reg >= 0 ? reg : 0;
+               copy_fetch_src(*n, *f, arg_start);
 
                f->insert_before(n);
        }
 
 }
 
+void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
+       assert(f.src.size() == 8);
+
+       fetch_node *n = sh.create_fetch();
+
+       n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
+
+       copy_fetch_src(*n, f, 4);
+
+       f.insert_before(n);
+}
+
 void bc_finalizer::finalize_fetch(fetch_node* f) {
 
        int reg = -1;
@@ -481,8 +558,12 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
 
        if (flags & FF_VTX) {
                src_count = 1;
+       } else if (flags & FF_GDS) {
+               src_count = 2;
        } else if (flags & FF_USEGRAD) {
                emit_set_grad(f);
+       } else if (flags & FF_USE_TEXTURE_OFFSETS) {
+               emit_set_texture_offsets(*f);
        }
 
        for (unsigned chan = 0; chan < src_count; ++chan) {
@@ -583,6 +664,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
        for (unsigned i = 0; i < 4; ++i)
                f->bc.dst_sel[i] = dst_swz[i];
 
+       if ((flags & FF_GDS) && reg == -1) {
+               f->bc.dst_sel[0] = SEL_MASK;
+               f->bc.dst_gpr = 0;
+               return ;
+       }
        assert(reg >= 0);
 
        if (reg >= 0)
@@ -687,15 +773,13 @@ void bc_finalizer::finalize_cf(cf_node* c) {
                        mask |= (1 << chan);
                }
 
-               assert(reg >= 0 && mask);
-
                if (reg >= 0)
                        update_ngpr(reg);
 
                c->bc.rw_gpr = reg >= 0 ? reg : 0;
                c->bc.comp_mask = mask;
 
-               if ((flags & CF_RAT) && (c->bc.type & 1)) {
+               if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
 
                        reg = -1;
 
@@ -734,8 +818,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 }
 
 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
-       unsigned sel = v->select.sel();
-       unsigned bank = sel >> 12;
+       unsigned sel = v->select.kcache_sel();
+       unsigned bank = v->select.kcache_bank();
        unsigned chan = v->select.chan();
        static const unsigned kc_base[] = {128, 160, 256, 288};
 
@@ -814,6 +898,8 @@ unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
                if (has_non_wqm_push)
                        ++stack_elements;
                break;
+       case HW_CLASS_UNKNOWN:
+               assert(0);
        }
        return stack_elements;
 }
@@ -855,6 +941,11 @@ void bc_finalizer::cf_peephole() {
                cf_node *c = static_cast<cf_node*>(*I);
 
                if (c->jump_after_target) {
+                       if (c->jump_target->next == NULL) {
+                               c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
+                               if (last_cf == c->jump_target)
+                                       last_cf = static_cast<cf_node*>(c->jump_target->next);
+                       }
                        c->jump_target = static_cast<cf_node*>(c->jump_target->next);
                        c->jump_after_target = false;
                }