X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fsb%2Fsb_bc_finalize.cpp;h=2ec4db624a585518fae5b0af54463537031f9f5c;hb=2f2cef385fd0f96f5cca3d5ccc48184bbc681831;hp=3f362c4d787949c4d899cc0e2837cf94e2a74d83;hpb=de0fd375f6de8f3357d05decc4a7dc231c679645;p=mesa.git diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 3f362c4d787..2ec4db624a5 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -38,6 +38,18 @@ namespace r600_sb { +void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) { + + alu_group_node *g = sh.create_alu_group(); + alu_node *a = sh.create_alu(); + + a->bc.set_op(ALU_OP0_NOP); + a->bc.last = 1; + + g->push_back(a); + b4->insert_before(g); +} + int bc_finalizer::run() { run_on(sh.root); @@ -115,6 +127,14 @@ void bc_finalizer::finalize_loop(region_node* r) { cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); + // Update last_cf, but don't overwrite it if it's outside the current loop nest since + // it may point to a cf that is later in program order. + // The single parent level check is sufficient since finalize_loop() is processed in + // reverse order from innermost to outermost loop nest level. + if (!last_cf || last_cf->get_parent_region() == r) { + last_cf = loop_end; + } + loop_start->jump_after(loop_end); loop_end->jump_after(loop_start); @@ -179,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) { cf_node *if_jump = sh.create_cf(CF_OP_JUMP); cf_node *if_pop = sh.create_cf(CF_OP_POP); + if (!last_cf || last_cf->get_parent_region() == r) { + last_cf = if_pop; + } if_pop->bc.pop_count = 1; if_pop->jump_after(if_pop); @@ -211,12 +234,12 @@ void bc_finalizer::finalize_if(region_node* r) { } void bc_finalizer::run_on(container_node* c) { - + node *prev_node = NULL; for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { node *n = *I; if (n->is_alu_group()) { - finalize_alu_group(static_cast(n)); + finalize_alu_group(static_cast(n), prev_node); } else { if (n->is_alu_clause()) { cf_node *c = static_cast(n); @@ -243,6 +266,7 @@ void bc_finalizer::run_on(container_node* c) { } } } + last_cf = c; } else if (n->is_fetch_inst()) { finalize_fetch(static_cast(n)); } else if (n->is_cf_inst()) { @@ -251,21 +275,26 @@ void bc_finalizer::run_on(container_node* c) { if (n->is_container()) run_on(static_cast(n)); } + prev_node = n; } } -void bc_finalizer::finalize_alu_group(alu_group_node* g) { +void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) { alu_node *last = NULL; + alu_group_node *prev_g = NULL; + bool add_nop = false; + if (prev_node && prev_node->is_alu_group()) { + prev_g = static_cast(prev_node); + } for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { alu_node *n = static_cast(*I); unsigned slot = n->bc.slot; - value *d = n->dst.empty() ? NULL : n->dst[0]; if (d && d->is_special_reg()) { - assert(n->bc.op_ptr->flags & AF_MOVA); + assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit()); d = NULL; } @@ -275,7 +304,8 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) { assert(fdst.chan() == slot || slot == SLOT_TRANS); } - n->bc.dst_gpr = fdst.sel(); + if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman())) + n->bc.dst_gpr = fdst.sel(); n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; @@ -299,17 +329,22 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g) { update_ngpr(n->bc.dst_gpr); - finalize_alu_src(g, n); + add_nop |= finalize_alu_src(g, n, prev_g); last = n; } + if (add_nop) { + if (sh.get_ctx().r6xx_gpr_index_workaround) { + insert_rv6xx_load_ar_workaround(g); + } + } last->bc.last = 1; } -void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { +bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) { vvec &sv = a->src; - + bool add_nop = false; FBC_DUMP( sblog << "finalize_alu_src: "; dump::dump_op(a); @@ -336,6 +371,15 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { if (!v->rel->is_const()) { src.rel = 1; update_ngpr(v->array->gpr.sel() + v->array->array_size -1); + if (prev && !add_nop) { + for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { + alu_node *pn = static_cast(*pI); + if (pn->bc.dst_gpr == src.sel) { + add_nop = true; + break; + } + } + } } else src.rel = 0; @@ -393,11 +437,23 @@ void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) { assert(!"unknown value kind"); break; } + if (prev && !add_nop) { + for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { + alu_node *pn = static_cast(*pI); + if (pn->bc.dst_rel) { + if (pn->bc.dst_gpr == src.sel) { + add_nop = true; + break; + } + } + } + } } while (si < 3) { a->bc.src[si++].sel = 0; } + return add_nop; } void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start) @@ -460,7 +516,7 @@ void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg void bc_finalizer::emit_set_grad(fetch_node* f) { - assert(f->src.size() == 12); + assert(f->src.size() == 12 || f->src.size() == 13); unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; unsigned arg_start = 0; @@ -502,6 +558,8 @@ void bc_finalizer::finalize_fetch(fetch_node* f) { if (flags & FF_VTX) { src_count = 1; + } else if (flags & FF_GDS) { + src_count = 2; } else if (flags & FF_USEGRAD) { emit_set_grad(f); } else if (flags & FF_USE_TEXTURE_OFFSETS) { @@ -606,6 +664,11 @@ void bc_finalizer::finalize_fetch(fetch_node* f) { for (unsigned i = 0; i < 4; ++i) f->bc.dst_sel[i] = dst_swz[i]; + if ((flags & FF_GDS) && reg == -1) { + f->bc.dst_sel[0] = SEL_MASK; + f->bc.dst_gpr = 0; + return ; + } assert(reg >= 0); if (reg >= 0) @@ -710,8 +773,6 @@ void bc_finalizer::finalize_cf(cf_node* c) { mask |= (1 << chan); } - assert(reg >= 0 && mask); - if (reg >= 0) update_ngpr(reg); @@ -757,8 +818,8 @@ void bc_finalizer::finalize_cf(cf_node* c) { } sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { - unsigned sel = v->select.sel(); - unsigned bank = sel >> 12; + unsigned sel = v->select.kcache_sel(); + unsigned bank = v->select.kcache_bank(); unsigned chan = v->select.chan(); static const unsigned kc_base[] = {128, 160, 256, 288}; @@ -880,6 +941,11 @@ void bc_finalizer::cf_peephole() { cf_node *c = static_cast(*I); if (c->jump_after_target) { + if (c->jump_target->next == NULL) { + c->jump_target->insert_after(sh.create_cf(CF_OP_NOP)); + if (last_cf == c->jump_target) + last_cf = static_cast(c->jump_target->next); + } c->jump_target = static_cast(c->jump_target->next); c->jump_after_target = false; }