Merge branch 'master' of ../mesa into vulkan
[mesa.git] / src / gallium / drivers / r600 / sb / sb_bc_finalize.cpp
index c883e5e5a67f4df8085aac76fc58a79ad889f488..522ff9d956e251ea6e8ed7f298a9fec55aaec7d8 100644 (file)
 #define FBC_DUMP(q)
 #endif
 
-#include <iostream>
-
 #include "sb_bc.h"
-
 #include "sb_shader.h"
-
 #include "sb_pass.h"
 
 namespace r600_sb {
 
-using std::cerr;
+void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
+
+       alu_group_node *g = sh.create_alu_group();
+       alu_node *a = sh.create_alu();
+
+       a->bc.set_op(ALU_OP0_NOP);
+       a->bc.last = 1;
+
+       g->push_back(a);
+       b4->insert_before(g);
+}
 
 int bc_finalizer::run() {
 
-       regions_vec &rv = sh.get_regions();
+       run_on(sh.root);
 
+       regions_vec &rv = sh.get_regions();
        for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
                        ++I) {
                region_node *r = *I;
@@ -64,13 +71,11 @@ int bc_finalizer::run() {
                r->expand();
        }
 
-       run_on(sh.root);
-
        cf_peephole();
 
        // workaround for some problems on r6xx/7xx
        // add ALU NOP to each vertex shader
-       if (!ctx.is_egcm() && sh.target == TARGET_VS) {
+       if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
                cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
 
                alu_group_node *g = sh.create_alu_group();
@@ -90,14 +95,18 @@ int bc_finalizer::run() {
                last_cf = c;
        }
 
-       if (last_cf->bc.op_ptr->flags & CF_ALU) {
+       if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
                last_cf = sh.create_cf(CF_OP_NOP);
                sh.root->push_back(last_cf);
        }
 
-       if (ctx.is_cayman())
-               last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
-       else
+       if (ctx.is_cayman()) {
+               if (!last_cf) {
+                       cf_node *c = sh.create_cf(CF_OP_CF_END);
+                       sh.root->push_back(c);
+               } else
+                       last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
+       } else
                last_cf->bc.end_of_program = 1;
 
        for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
@@ -113,9 +122,19 @@ int bc_finalizer::run() {
 
 void bc_finalizer::finalize_loop(region_node* r) {
 
+       update_nstack(r);
+
        cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
        cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
 
+       // Update last_cf, but don't overwrite it if it's outside the current loop nest since
+       // it may point to a cf that is later in program order.
+       // The single parent level check is sufficient since finalize_loop() is processed in
+       // reverse order from innermost to outermost loop nest level.
+       if (!last_cf || last_cf->get_parent_region() == r) {
+               last_cf = loop_end;
+       }
+
        loop_start->jump_after(loop_end);
        loop_end->jump_after(loop_start);
 
@@ -180,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
                cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
                cf_node *if_pop = sh.create_cf(CF_OP_POP);
 
+               if (!last_cf || last_cf->get_parent_region() == r) {
+                       last_cf = if_pop;
+               }
                if_pop->bc.pop_count = 1;
                if_pop->jump_after(if_pop);
 
@@ -212,43 +234,66 @@ void bc_finalizer::finalize_if(region_node* r) {
 }
 
 void bc_finalizer::run_on(container_node* c) {
-
+       node *prev_node = NULL;
        for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
                node *n = *I;
 
                if (n->is_alu_group()) {
-                       finalize_alu_group(static_cast<alu_group_node*>(n));
+                       finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
                } else {
-                       if (n->is_fetch_inst()) {
+                       if (n->is_alu_clause()) {
+                               cf_node *c = static_cast<cf_node*>(n);
+
+                               if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
+                                       if (ctx.stack_workaround_8xx) {
+                                               region_node *r = c->get_parent_region();
+                                               if (r) {
+                                                       unsigned ifs, loops;
+                                                       unsigned elems = get_stack_depth(r, loops, ifs);
+                                                       unsigned dmod1 = elems % ctx.stack_entry_size;
+                                                       unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
+
+                                                       if (elems && (!dmod1 || !dmod2))
+                                                               c->flags |= NF_ALU_STACK_WORKAROUND;
+                                               }
+                                       } else if (ctx.stack_workaround_9xx) {
+                                               region_node *r = c->get_parent_region();
+                                               if (r) {
+                                                       unsigned ifs, loops;
+                                                       get_stack_depth(r, loops, ifs);
+                                                       if (loops >= 2)
+                                                               c->flags |= NF_ALU_STACK_WORKAROUND;
+                                               }
+                                       }
+                               }
+                       } else if (n->is_fetch_inst()) {
                                finalize_fetch(static_cast<fetch_node*>(n));
                        } else if (n->is_cf_inst()) {
                                finalize_cf(static_cast<cf_node*>(n));
-                       } else if (n->is_alu_clause()) {
-
-                       } else if (n->is_fetch_clause()) {
-
-                       } else {
-                               assert(!"unexpected node");
                        }
-
                        if (n->is_container())
                                run_on(static_cast<container_node*>(n));
                }
+               prev_node = n;
        }
 }
 
-void bc_finalizer::finalize_alu_group(alu_group_node* g) {
+void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
 
        alu_node *last = NULL;
+       alu_group_node *prev_g = NULL;
+       bool add_nop = false;
+       if (prev_node && prev_node->is_alu_group()) {
+               prev_g = static_cast<alu_group_node*>(prev_node);
+       }
 
        for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
                alu_node *n = static_cast<alu_node*>(*I);
                unsigned slot = n->bc.slot;
-
                value *d = n->dst.empty() ? NULL : n->dst[0];
 
                if (d && d->is_special_reg()) {
-                       assert(n->bc.op_ptr->flags & AF_MOVA);
+                       assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
                        d = NULL;
                }
 
@@ -282,21 +327,26 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) {
 
                update_ngpr(n->bc.dst_gpr);
 
-               finalize_alu_src(g, n);
+               add_nop |= finalize_alu_src(g, n, prev_g);
 
                last = n;
        }
 
+       if (add_nop) {
+               if (sh.get_ctx().r6xx_gpr_index_workaround) {
+                       insert_rv6xx_load_ar_workaround(g);
+               }
+       }
        last->bc.last = 1;
 }
 
-void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
+bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
        vvec &sv = a->src;
-
+       bool add_nop = false;
        FBC_DUMP(
-               cerr << "finalize_alu_src: ";
+               sblog << "finalize_alu_src: ";
                dump::dump_op(a);
-               cerr << "\n";
+               sblog << "\n";
        );
 
        unsigned si = 0;
@@ -319,6 +369,15 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
                        if (!v->rel->is_const()) {
                                src.rel = 1;
                                update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
+                               if (prev && !add_nop) {
+                                       for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+                                               alu_node *pn = static_cast<alu_node*>(*pI);
+                                               if (pn->bc.dst_gpr == src.sel) {
+                                                       add_nop = true;
+                                                       break;
+                                               }
+                                       }
+                               }
                        } else
                                src.rel = 0;
 
@@ -376,88 +435,115 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
                        assert(!"unknown value kind");
                        break;
                }
+               if (prev && !add_nop) {
+                       for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
+                               alu_node *pn = static_cast<alu_node*>(*pI);
+                               if (pn->bc.dst_rel) {
+                                       if (pn->bc.dst_gpr == src.sel) {
+                                               add_nop = true;
+                                               break;
+                                       }
+                               }
+                       }
+               }
        }
 
        while (si < 3) {
                a->bc.src[si++].sel = 0;
        }
+       return add_nop;
 }
 
-void bc_finalizer::emit_set_grad(fetch_node* f) {
+void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
+{
+       int reg = -1;
 
-       assert(f->src.size() == 12);
-       unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
+       for (unsigned chan = 0; chan < 4; ++chan) {
 
-       unsigned arg_start = 0;
+               dst.bc.dst_sel[chan] = SEL_MASK;
 
-       for (unsigned op = 0; op < 2; ++op) {
-               fetch_node *n = sh.create_fetch();
-               n->bc.set_op(ops[op]);
+               unsigned sel = SEL_MASK;
 
-               // FIXME extract this loop into a separate method and reuse it
+               value *v = src.src[arg_start + chan];
 
-               int reg = -1;
+               if (!v || v->is_undef()) {
+                       sel = SEL_MASK;
+               } else if (v->is_const()) {
+                       literal l = v->literal_value;
+                       if (l == literal(0))
+                               sel = SEL_0;
+                       else if (l == literal(1.0f))
+                               sel = SEL_1;
+                       else {
+                               sblog << "invalid fetch constant operand  " << chan << " ";
+                               dump::dump_op(&src);
+                               sblog << "\n";
+                               abort();
+                       }
 
-               arg_start += 4;
+               } else if (v->is_any_gpr()) {
+                       unsigned vreg = v->gpr.sel();
+                       unsigned vchan = v->gpr.chan();
 
-               for (unsigned chan = 0; chan < 4; ++chan) {
+                       if (reg == -1)
+                               reg = vreg;
+                       else if ((unsigned)reg != vreg) {
+                               sblog << "invalid fetch source operand  " << chan << " ";
+                               dump::dump_op(&src);
+                               sblog << "\n";
+                               abort();
+                       }
 
-                       n->bc.dst_sel[chan] = SEL_MASK;
+                       sel = vchan;
 
-                       unsigned sel = SEL_MASK;
+               } else {
+                       sblog << "invalid fetch source operand  " << chan << " ";
+                       dump::dump_op(&src);
+                       sblog << "\n";
+                       abort();
+               }
 
-                       value *v = f->src[arg_start + chan];
+               dst.bc.src_sel[chan] = sel;
+       }
 
-                       if (!v || v->is_undef()) {
-                               sel = SEL_MASK;
-                       } else if (v->is_const()) {
-                               literal l = v->literal_value;
-                               if (l == literal(0))
-                                       sel = SEL_0;
-                               else if (l == literal(1.0f))
-                                       sel = SEL_1;
-                               else {
-                                       cerr << "invalid fetch constant operand  " << chan << " ";
-                                       dump::dump_op(f);
-                                       cerr << "\n";
-                                       abort();
-                               }
+       if (reg >= 0)
+               update_ngpr(reg);
 
-                       } else if (v->is_any_gpr()) {
-                               unsigned vreg = v->gpr.sel();
-                               unsigned vchan = v->gpr.chan();
+       dst.bc.src_gpr = reg >= 0 ? reg : 0;
+}
 
-                               if (reg == -1)
-                                       reg = vreg;
-                               else if ((unsigned)reg != vreg) {
-                                       cerr << "invalid fetch source operand  " << chan << " ";
-                                       dump::dump_op(f);
-                                       cerr << "\n";
-                                       abort();
-                               }
+void bc_finalizer::emit_set_grad(fetch_node* f) {
 
-                               sel = vchan;
+       assert(f->src.size() == 12);
+       unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
 
-                       } else {
-                               cerr << "invalid fetch source operand  " << chan << " ";
-                               dump::dump_op(f);
-                               cerr << "\n";
-                               abort();
-                       }
+       unsigned arg_start = 0;
 
-                       n->bc.src_sel[chan] = sel;
-               }
+       for (unsigned op = 0; op < 2; ++op) {
+               fetch_node *n = sh.create_fetch();
+               n->bc.set_op(ops[op]);
 
-               if (reg >= 0)
-                       update_ngpr(reg);
+               arg_start += 4;
 
-               n->bc.src_gpr = reg >= 0 ? reg : 0;
+               copy_fetch_src(*n, *f, arg_start);
 
                f->insert_before(n);
        }
 
 }
 
+void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
+       assert(f.src.size() == 8);
+
+       fetch_node *n = sh.create_fetch();
+
+       n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
+
+       copy_fetch_src(*n, f, 4);
+
+       f.insert_before(n);
+}
+
 void bc_finalizer::finalize_fetch(fetch_node* f) {
 
        int reg = -1;
@@ -472,6 +558,8 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
                src_count = 1;
        } else if (flags & FF_USEGRAD) {
                emit_set_grad(f);
+       } else if (flags & FF_USE_TEXTURE_OFFSETS) {
+               emit_set_texture_offsets(*f);
        }
 
        for (unsigned chan = 0; chan < src_count; ++chan) {
@@ -492,9 +580,9 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
                        else if (l == literal(1.0f))
                                sel = SEL_1;
                        else {
-                               cerr << "invalid fetch constant operand  " << chan << " ";
+                               sblog << "invalid fetch constant operand  " << chan << " ";
                                dump::dump_op(f);
-                               cerr << "\n";
+                               sblog << "\n";
                                abort();
                        }
 
@@ -505,18 +593,18 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
                        if (reg == -1)
                                reg = vreg;
                        else if ((unsigned)reg != vreg) {
-                               cerr << "invalid fetch source operand  " << chan << " ";
+                               sblog << "invalid fetch source operand  " << chan << " ";
                                dump::dump_op(f);
-                               cerr << "\n";
+                               sblog << "\n";
                                abort();
                        }
 
                        sel = vchan;
 
                } else {
-                       cerr << "invalid fetch source operand  " << chan << " ";
+                       sblog << "invalid fetch source operand  " << chan << " ";
                        dump::dump_op(f);
-                       cerr << "\n";
+                       sblog << "\n";
                        abort();
                }
 
@@ -552,18 +640,18 @@ void bc_finalizer::finalize_fetch(fetch_node* f) {
                        if (reg == -1)
                                reg = vreg;
                        else if ((unsigned)reg != vreg) {
-                               cerr << "invalid fetch dst operand  " << chan << " ";
+                               sblog << "invalid fetch dst operand  " << chan << " ";
                                dump::dump_op(f);
-                               cerr << "\n";
+                               sblog << "\n";
                                abort();
                        }
 
                        dst_swz[vchan] = sel;
 
                } else {
-                       cerr << "invalid fetch dst operand  " << chan << " ";
+                       sblog << "invalid fetch dst operand  " << chan << " ";
                        dump::dump_op(f);
-                       cerr << "\n";
+                       sblog << "\n";
                        abort();
                }
 
@@ -584,10 +672,6 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 
        unsigned flags = c->bc.op_ptr->flags;
 
-       if (flags & CF_CALL) {
-               update_nstack(c->get_parent_region(), ctx.is_cayman() ? 1 : 2);
-       }
-
        c->bc.end_of_program = 0;
        last_cf = c;
 
@@ -615,9 +699,9 @@ void bc_finalizer::finalize_cf(cf_node* c) {
                                else if (l == literal(1.0f))
                                        sel = SEL_1;
                                else {
-                                       cerr << "invalid export constant operand  " << chan << " ";
+                                       sblog << "invalid export constant operand  " << chan << " ";
                                        dump::dump_op(c);
-                                       cerr << "\n";
+                                       sblog << "\n";
                                        abort();
                                }
 
@@ -628,18 +712,18 @@ void bc_finalizer::finalize_cf(cf_node* c) {
                                if (reg == -1)
                                        reg = vreg;
                                else if ((unsigned)reg != vreg) {
-                                       cerr << "invalid export source operand  " << chan << " ";
+                                       sblog << "invalid export source operand  " << chan << " ";
                                        dump::dump_op(c);
-                                       cerr << "\n";
+                                       sblog << "\n";
                                        abort();
                                }
 
                                sel = vchan;
 
                        } else {
-                               cerr << "invalid export source operand  " << chan << " ";
+                               sblog << "invalid export source operand  " << chan << " ";
                                dump::dump_op(c);
-                               cerr << "\n";
+                               sblog << "\n";
                                abort();
                        }
 
@@ -662,33 +746,31 @@ void bc_finalizer::finalize_cf(cf_node* c) {
                                continue;
 
                        if (!v->is_any_gpr() || v->gpr.chan() != chan) {
-                               cerr << "invalid source operand  " << chan << " ";
+                               sblog << "invalid source operand  " << chan << " ";
                                dump::dump_op(c);
-                               cerr << "\n";
+                               sblog << "\n";
                                abort();
                        }
                        unsigned vreg = v->gpr.sel();
                        if (reg == -1)
                                reg = vreg;
                        else if ((unsigned)reg != vreg) {
-                               cerr << "invalid source operand  " << chan << " ";
+                               sblog << "invalid source operand  " << chan << " ";
                                dump::dump_op(c);
-                               cerr << "\n";
+                               sblog << "\n";
                                abort();
                        }
 
                        mask |= (1 << chan);
                }
 
-               assert(reg >= 0 && mask);
-
                if (reg >= 0)
                        update_ngpr(reg);
 
                c->bc.rw_gpr = reg >= 0 ? reg : 0;
                c->bc.comp_mask = mask;
 
-               if ((flags & CF_RAT) && (c->bc.type & 1)) {
+               if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
 
                        reg = -1;
 
@@ -698,18 +780,18 @@ void bc_finalizer::finalize_cf(cf_node* c) {
                                        continue;
 
                                if (!v->is_any_gpr() || v->gpr.chan() != chan) {
-                                       cerr << "invalid source operand  " << chan << " ";
+                                       sblog << "invalid source operand  " << chan << " ";
                                        dump::dump_op(c);
-                                       cerr << "\n";
+                                       sblog << "\n";
                                        abort();
                                }
                                unsigned vreg = v->gpr.sel();
                                if (reg == -1)
                                        reg = vreg;
                                else if ((unsigned)reg != vreg) {
-                                       cerr << "invalid source operand  " << chan << " ";
+                                       sblog << "invalid source operand  " << chan << " ";
                                        dump::dump_op(c);
-                                       cerr << "\n";
+                                       sblog << "\n";
                                        abort();
                                }
                        }
@@ -721,17 +803,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
 
                        c->bc.index_gpr = reg >= 0 ? reg : 0;
                }
-
-
-
-       } else {
-
-#if 0
-               if ((flags & (CF_BRANCH | CF_LOOP)) && !sh.uses_gradients) {
-                       c->bc.valid_pixel_mode = 1;
-               }
-#endif
-
+       } else if (flags & CF_CALL) {
+               update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
        }
 }
 
@@ -769,37 +842,88 @@ void bc_finalizer::update_ngpr(unsigned gpr) {
                ngpr = gpr + 1;
 }
 
-void bc_finalizer::update_nstack(region_node* r, unsigned add) {
-       unsigned loops = 0;
-       unsigned ifs = 0;
+unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
+                                           unsigned &ifs, unsigned add) {
+       unsigned stack_elements = add;
+       bool has_non_wqm_push = (add != 0);
+       region_node *r = n->is_region() ?
+                       static_cast<region_node*>(n) : n->get_parent_region();
+
+       loops = 0;
+       ifs = 0;
 
        while (r) {
-               if (r->is_loop())
+               if (r->is_loop()) {
                        ++loops;
-               else
+               } else {
                        ++ifs;
-
+                       has_non_wqm_push = true;
+               }
                r = r->get_parent_region();
        }
-
-       unsigned stack_elements = (loops * ctx.stack_entry_size) + ifs + add;
-
-       // FIXME calculate more precisely
-       if (ctx.is_evergreen()) {
-               ++stack_elements;
-       } else {
-               stack_elements += 2;
-               if (ctx.is_cayman())
+       stack_elements += (loops * ctx.stack_entry_size) + ifs;
+
+       // reserve additional elements in some cases
+       switch (ctx.hw_class) {
+       case HW_CLASS_R600:
+       case HW_CLASS_R700:
+               // If any non-WQM push is invoked, 2 elements should be reserved.
+               if (has_non_wqm_push)
+                       stack_elements += 2;
+               break;
+       case HW_CLASS_CAYMAN:
+               // If any stack operation is invoked, 2 elements should be reserved
+               if (stack_elements)
+                       stack_elements += 2;
+               break;
+       case HW_CLASS_EVERGREEN:
+               // According to the docs we need to reserve 1 element for each of the
+               // following cases:
+               //   1) non-WQM push is used with WQM/LOOP frames on stack
+               //   2) ALU_ELSE_AFTER is used at the point of max stack usage
+               // NOTE:
+               // It was found that the conditions above are not sufficient, there are
+               // other cases where we also need to reserve stack space, that's why
+               // we always reserve 1 stack element if we have non-WQM push on stack.
+               // Condition 2 is ignored for now because we don't use this instruction.
+               if (has_non_wqm_push)
                        ++stack_elements;
+               break;
+       case HW_CLASS_UNKNOWN:
+               assert(0);
        }
+       return stack_elements;
+}
+
+void bc_finalizer::update_nstack(region_node* r, unsigned add) {
+       unsigned loops = 0;
+       unsigned ifs = 0;
+       unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
 
-       unsigned stack_entries = (stack_elements + 3) >> 2;
+       // XXX all chips expect this value to be computed using 4 as entry size,
+       // not the real entry size
+       unsigned stack_entries = (elems + 3) >> 2;
 
        if (nstack < stack_entries)
                nstack = stack_entries;
 }
 
 void bc_finalizer::cf_peephole() {
+       if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
+               for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
+                               I = N) {
+                       N = I; ++N;
+                       cf_node *c = static_cast<cf_node*>(*I);
+
+                       if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
+                                       (c->flags & NF_ALU_STACK_WORKAROUND)) {
+                               cf_node *push = sh.create_cf(CF_OP_PUSH);
+                               c->insert_before(push);
+                               push->jump(c);
+                               c->bc.set_op(CF_OP_ALU);
+                       }
+               }
+       }
 
        for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
                        I = N) {