From 63d09a0cb74f75e7e09773d560a22452c107647f Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Mon, 27 May 2013 01:37:54 +0400 Subject: [PATCH] r600g/sb: improve handling of KILL instructions This patch improves handling of unconditional KILL instructions inside the conditional blocks, uncovering more opportunities for if-conversion. Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/sb/sb_expr.h | 7 +- .../drivers/r600/sb/sb_if_conversion.cpp | 214 +++++++++++------- src/gallium/drivers/r600/sb/sb_pass.h | 7 +- 3 files changed, 139 insertions(+), 89 deletions(-) diff --git a/src/gallium/drivers/r600/sb/sb_expr.h b/src/gallium/drivers/r600/sb/sb_expr.h index c7f7dbfe447..032867be559 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.h +++ b/src/gallium/drivers/r600/sb/sb_expr.h @@ -72,12 +72,13 @@ public: bool fold_alu_op2(alu_node &n); bool fold_alu_op3(alu_node &n); - void apply_alu_src_mod(const bc_alu &bc, unsigned src, literal &v); - void apply_alu_dst_mod(const bc_alu &bc, literal &v); + static void apply_alu_src_mod(const bc_alu &bc, unsigned src, literal &v); + static void apply_alu_dst_mod(const bc_alu &bc, literal &v); void assign_source(value *dst, value *src); - bool evaluate_condition(unsigned alu_cnd_flags, literal s1, literal s2); + static bool evaluate_condition(unsigned alu_cnd_flags, literal s1, + literal s2); }; } // namespace r600_sb diff --git a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp index 6411d06bdf6..96dd1ee31fe 100644 --- a/src/gallium/drivers/r600/sb/sb_if_conversion.cpp +++ b/src/gallium/drivers/r600/sb/sb_if_conversion.cpp @@ -56,90 +56,76 @@ int if_conversion::run() { return 0; } -unsigned if_conversion::try_convert_kills(region_node* r) { +void if_conversion::convert_kill_instructions(region_node *r, + value *em, bool branch, + container_node *c) { + value *cnd = NULL; - // handling the simplest (and probably most frequent) case only - - // if - 4 kills - endif + for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) { + N = I + 1; - // TODO handle more complex cases + if (!I->is_alu_inst()) + continue; - depart_node *d1 = static_cast(r->front()); - if (!d1->is_depart()) - return 0; + alu_node *a = static_cast(*I); + unsigned flags = a->bc.op_ptr->flags; - if_node *f = static_cast(d1->front()); - if (!f->is_if()) - return 0; + if (!(flags & AF_KILL)) + continue; - depart_node *d2 = static_cast(f->front()); - if (!d2->is_depart()) - return 0; + // ignore predicated or non-const kill instructions + if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const()) + continue; - unsigned cnt = 0; + literal l0 = a->src[0]->literal_value; + literal l1 = a->src[1]->literal_value; - for (node_iterator I = d2->begin(), E = d2->end(); I != E; ++I) { - alu_node *n = static_cast(*I); - if (!n->is_alu_inst()) - return 0; + expr_handler::apply_alu_src_mod(a->bc, 0, l0); + expr_handler::apply_alu_src_mod(a->bc, 1, l1); - if (!(n->bc.op_ptr->flags & AF_KILL)) - return 0; + if (expr_handler::evaluate_condition(flags, l0, l1)) { + // kill with constant 'true' condition, we'll convert it to the + // conditional kill outside of the if-then-else block - if (n->bc.op_ptr->src_count != 2 || n->src.size() != 2) - return 0; + a->remove(); - value *s1 = n->src[0], *s2 = n->src[1]; + if (!cnd) { + cnd = get_select_value_for_em(sh, em); + } else { + // more than one kill with the same condition, just remove it + continue; + } - // assuming that the KILL with constant operands is "always kill" + r->insert_before(a); + a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT); - if (!s1 || !s2 || !s1->is_const() || !s2->is_const()) - return 0; - - ++cnt; - } - - if (cnt > 4) - return 0; - - value *cond = f->cond; - value *pred = get_select_value_for_em(sh, cond); - - if (!pred) - return 0; - - for (node_iterator N, I = d2->begin(), E = d2->end(); I != E; I = N) { - N = I; ++N; - - alu_node *n = static_cast(*I); - - IFC_DUMP( - sblog << "converting "; - dump::dump_op(n); - sblog << " " << n << "\n"; - ); - - n->remove(); - - n->bc.set_op(ALU_OP2_KILLE_INT); - n->src[0] = pred; - n->src[1] = sh.get_const_value(0); - // reset src modifiers - memset(&n->bc.src[0], 0, sizeof(bc_alu_src)); - memset(&n->bc.src[1], 0, sizeof(bc_alu_src)); - - r->insert_before(n); + a->src[0] = cnd; + a->src[1] = sh.get_const_value(0); + // clear modifiers + memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); + memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); + } else { + // kill with constant 'false' condition, this shouldn't happen + // but remove it anyway + a->remove(); + } } - - return cnt; } +bool if_conversion::check_and_convert(region_node *r) { - -bool if_conversion::run_on(region_node* r) { - - if (r->dep_count() != 2 || r->rep_count() != 1) + depart_node *nd1 = static_cast(r->first); + if (!nd1->is_depart()) + return false; + if_node *nif = static_cast(nd1->first); + if (!nif->is_if()) + return false; + depart_node *nd2 = static_cast(nif->first); + if (!nd2->is_depart()) return false; + value* &em = nif->cond; + node_stats s; r->collect_stats(s); @@ -149,7 +135,7 @@ bool if_conversion::run_on(region_node* r) { s.dump(); ); - if (s.region_count || s.fetch_count || + if (s.region_count || s.fetch_count || s.alu_kill_count || s.if_count != 1 || s.repeat_count) return false; @@ -189,25 +175,8 @@ bool if_conversion::run_on(region_node* r) { if (real_alu_count > 400) return false; - if (s.alu_kill_count) { - unsigned kcnt = try_convert_kills(r); - if (kcnt < s.alu_kill_count) - return false; - } - IFC_DUMP( sblog << "if_cvt: processing...\n"; ); - depart_node *nd1 = static_cast(r->first); - if (!nd1->is_depart()) - return false; - if_node *nif = static_cast(nd1->first); - if (!nif->is_if()) - return false; - depart_node *nd2 = static_cast(nif->first); - if (!nd2->is_depart()) - return false; - - value *em = nif->cond; value *select = get_select_value_for_em(sh, em); if (!select) @@ -230,6 +199,83 @@ bool if_conversion::run_on(region_node* r) { return true; } +bool if_conversion::run_on(region_node* r) { + + if (r->dep_count() != 2 || r->rep_count() != 1) + return false; + + depart_node *nd1 = static_cast(r->first); + if (!nd1->is_depart()) + return false; + if_node *nif = static_cast(nd1->first); + if (!nif->is_if()) + return false; + depart_node *nd2 = static_cast(nif->first); + if (!nd2->is_depart()) + return false; + + value* &em = nif->cond; + + convert_kill_instructions(r, em, true, nd2); + convert_kill_instructions(r, em, false, nd1); + + if (check_and_convert(r)) + return true; + + if (nd2->empty() && nif->next) { + // empty true branch, non-empty false branch + // we'll invert it to get rid of 'else' + + assert(em && em->def); + + alu_node *predset = static_cast(em->def); + + // create clone of PREDSET instruction with inverted condition. + // PREDSET has 3 dst operands in our IR (value written to gpr, + // predicate value and exec mask value), we'll split it such that + // new PREDSET will define exec mask value only, and two others will + // be defined in the old PREDSET (if they are not used then DCE will + // simply remove old PREDSET). + + alu_node *newpredset = sh.clone(predset); + predset->insert_after(newpredset); + + predset->dst[2] = NULL; + + newpredset->dst[0] = NULL; + newpredset->dst[1] = NULL; + + em->def = newpredset; + + unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK; + unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK; + bool swapargs = false; + + cc = invert_setcc_condition(cc, swapargs); + + if (swapargs) { + std::swap(newpredset->src[0], newpredset->src[1]); + std::swap(newpredset->bc.src[0], newpredset->bc.src[1]); + } + + unsigned newopcode = get_predsetcc_opcode(cc, cmptype); + newpredset->bc.set_op(newopcode); + + // move the code from the 'false' branch ('else') to the 'true' branch + nd2->move(nif->next, NULL); + + // swap phi operands + for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; + ++I) { + node *p = *I; + assert(p->src.size() == 2); + std::swap(p->src[0], p->src[1]); + } + } + + return false; +} + alu_node* if_conversion::convert_phi(value* select, node* phi) { assert(phi->dst.size() == 1 || phi->src.size() == 2); diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h index a6338ae0176..7e606da822c 100644 --- a/src/gallium/drivers/r600/sb/sb_pass.h +++ b/src/gallium/drivers/r600/sb/sb_pass.h @@ -338,9 +338,12 @@ public: bool run_on(region_node *r); - alu_node* convert_phi(value *select, node *phi); + void convert_kill_instructions(region_node *r, value *em, bool branch, + container_node *c); + + bool check_and_convert(region_node *r); - unsigned try_convert_kills(region_node* r); + alu_node* convert_phi(value *select, node *phi); }; -- 2.30.2