r600g/sb: improve optimization of conditional instructions
authorVadim Girlin <vadimgirlin@gmail.com>
Mon, 27 May 2013 00:00:03 +0000 (04:00 +0400)
committerVadim Girlin <vadimgirlin@gmail.com>
Mon, 27 May 2013 11:19:20 +0000 (15:19 +0400)
Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
src/gallium/drivers/r600/sb/sb_core.cpp
src/gallium/drivers/r600/sb/sb_expr.cpp
src/gallium/drivers/r600/sb/sb_expr.h
src/gallium/drivers/r600/sb/sb_if_conversion.cpp
src/gallium/drivers/r600/sb/sb_pass.h
src/gallium/drivers/r600/sb/sb_peephole.cpp

index 034505704fe71b9fd6ca308dbc94412e44dbafeb..5b917ac6e7595af008660669d7d3454fba1dabf1 100644 (file)
@@ -188,9 +188,14 @@ int r600_sb_bytecode_process(struct r600_context *rctx,
 
        sh->set_undef(sh->root->live_before);
 
-       SB_RUN_PASS(peephole,                   1);
        SB_RUN_PASS(if_conversion,              1);
 
+       // if_conversion breaks info about uses, but next pass (peephole)
+       // doesn't need it, so we can skip def/use update here
+       // until it's really required
+       //SB_RUN_PASS(def_use,                  0);
+
+       SB_RUN_PASS(peephole,                   1);
        SB_RUN_PASS(def_use,                    0);
 
        SB_RUN_PASS(gvn,                                1);
index 8582c8e8e0b7f0135f2079c8ef78ce466dda5f5b..65a764153a7231b4fc39c07090cb604399d175fe 100644 (file)
@@ -580,7 +580,7 @@ unsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
        return ncc;
 }
 
-unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) {
+unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
 
        if (int_dst && cmp_type == AF_FLOAT_CMP) {
                switch (cc) {
@@ -612,6 +612,8 @@ unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) {
                }
                case AF_UINT_CMP: {
                        switch (cc) {
+                       case AF_CC_E: return ALU_OP2_SETE_INT;
+                       case AF_CC_NE: return ALU_OP2_SETNE_INT;
                        case AF_CC_GT: return ALU_OP2_SETGT_UINT;
                        case AF_CC_GE: return ALU_OP2_SETGE_UINT;
                        }
@@ -624,7 +626,7 @@ unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst) {
        return ~0u;
 }
 
-unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) {
+unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
 
        switch(cmp_type) {
        case AF_FLOAT_CMP: {
@@ -647,6 +649,8 @@ unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) {
        }
        case AF_UINT_CMP: {
                switch (cc) {
+               case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
+               case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
                case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
                case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
                }
@@ -658,6 +662,44 @@ unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type) {
        return ~0u;
 }
 
+unsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
+
+       switch(cmp_type) {
+       case AF_FLOAT_CMP: {
+               switch (cc) {
+               case AF_CC_E: return ALU_OP2_KILLE;
+               case AF_CC_NE: return ALU_OP2_KILLNE;
+               case AF_CC_GT: return ALU_OP2_KILLGT;
+               case AF_CC_GE: return ALU_OP2_KILLGE;
+               }
+               break;
+       }
+       case AF_INT_CMP: {
+               switch (cc) {
+               case AF_CC_E: return ALU_OP2_KILLE_INT;
+               case AF_CC_NE: return ALU_OP2_KILLNE_INT;
+               case AF_CC_GT: return ALU_OP2_KILLGT_INT;
+               case AF_CC_GE: return ALU_OP2_KILLGE_INT;
+               }
+               break;
+       }
+       case AF_UINT_CMP: {
+               switch (cc) {
+               case AF_CC_E: return ALU_OP2_KILLE_INT;
+               case AF_CC_NE: return ALU_OP2_KILLNE_INT;
+               case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
+               case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
+               }
+               break;
+       }
+       }
+
+       assert(!"unexpected cc&cmp_type combination");
+       return ~0u;
+}
+
+
+
 void convert_predset_to_set(shader& sh, alu_node* a) {
 
        unsigned flags = a->bc.op_ptr->flags;
@@ -668,7 +710,7 @@ void convert_predset_to_set(shader& sh, alu_node* a) {
 
        cc = invert_setcc_condition(cc, swap_args);
 
-       unsigned newop = get_setcc_opcode(cc, cmp_type, true);
+       unsigned newop = get_setcc_op(cc, cmp_type, true);
 
        a->dst.resize(1);
        a->bc.set_op(newop);
index 032867be5590aef6394c939c682eebb8250fbf4a..89177141a764983bcf21aa5077e54a14b37b147b 100644 (file)
@@ -37,8 +37,9 @@ value* get_select_value_for_em(shader &sh, value *em);
 
 void convert_predset_to_set(shader &sh, alu_node *a);
 unsigned invert_setcc_condition(unsigned cc, bool &swap_args);
-unsigned get_setcc_opcode(unsigned cc, unsigned cmp_type, bool int_dst);
-unsigned get_predsetcc_opcode(unsigned cc, unsigned cmp_type);
+unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst);
+unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type);
+unsigned get_killcc_op(unsigned cc, unsigned cmp_type);
 
 class expr_handler {
 
index 96dd1ee31fe8caccc2050eb5037fdb4e2693a37c..93edacec7af8251fcc98aed6a04e6d1d55168c0d 100644 (file)
@@ -258,7 +258,7 @@ bool if_conversion::run_on(region_node* r) {
                        std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
                }
 
-               unsigned newopcode = get_predsetcc_opcode(cc, cmptype);
+               unsigned newopcode = get_predsetcc_op(cc, cmptype);
                newpredset->bc.set_op(newopcode);
 
                // move the code from the 'false' branch ('else') to the 'true' branch
index 7e606da822c1445c8eb5d1f00e0451c11fc366f7..c7272ba2680b053032a09df0c755b493221ec9ca 100644 (file)
@@ -413,7 +413,7 @@ public:
 
        void optimize_cc_op(alu_node *a);
 
-       void optimize_SETcc_op(alu_node *a);
+       void optimize_cc_op2(alu_node *a);
        void optimize_CNDcc_op(alu_node *a);
 
        bool get_bool_op_info(value *b, bool_op_info& bop);
index 6373b5c3a610803410899f440a2037187a82fa4c..cb423c9b2247318de980e861aaa33d5bf6919690 100644 (file)
@@ -46,7 +46,7 @@ int peephole::run() {
 
 void peephole::run_on(container_node* c) {
 
-       for (node_riterator I = c->rbegin(), E = c->rend(); I != E; ++I) {
+       for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
                node *n = *I;
 
                if (n->is_container())
@@ -56,7 +56,8 @@ void peephole::run_on(container_node* c) {
                        if (n->is_alu_inst()) {
                                alu_node *a = static_cast<alu_node*>(n);
 
-                               if (a->bc.op_ptr->flags & (AF_PRED | AF_SET | AF_CMOV)) {
+                               if (a->bc.op_ptr->flags &
+                                               (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
                                        optimize_cc_op(a);
                                } else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
 
@@ -73,8 +74,8 @@ void peephole::run_on(container_node* c) {
 void peephole::optimize_cc_op(alu_node* a) {
        unsigned aflags = a->bc.op_ptr->flags;
 
-       if (aflags & (AF_PRED | AF_SET)) {
-               optimize_SETcc_op(a);
+       if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
+               optimize_cc_op2(a);
        } else if (aflags & AF_CMOV) {
                optimize_CNDcc_op(a);
        }
@@ -90,26 +91,37 @@ void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
        f2i->remove();
 }
 
-void peephole::optimize_SETcc_op(alu_node* a) {
+void peephole::optimize_cc_op2(alu_node* a) {
 
        unsigned flags = a->bc.op_ptr->flags;
        unsigned cc = flags & AF_CC_MASK;
+
+       if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
+               return;
+
        unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
        unsigned dst_type = flags & AF_DST_TYPE_MASK;
-       bool is_pred = flags & AF_PRED;
 
-       // TODO handle other cases
+       int op_kind = (flags & AF_PRED) ? 1 :
+                       (flags & AF_SET) ? 2 :
+                       (flags & AF_KILL) ? 3 : 0;
+
+       bool swapped = false;
 
-       if (a->src[1]->is_const() && (cc == AF_CC_E || cc == AF_CC_NE) &&
-                       a->src[1]->literal_value == literal(0) &&
-                       a->bc.src[0].neg == 0 && a->bc.src[0].abs == 0) {
+       if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
+               std::swap(a->src[0],a->src[1]);
+               swapped = true;
+       }
+
+       if (swapped || (a->src[1]->is_const() &&
+                       a->src[1]->literal_value == literal(0))) {
 
                value *s = a->src[0];
 
                bool_op_info bop = {};
 
                PPH_DUMP(
-                       sblog << "optSETcc ";
+                       sblog << "cc_op2: ";
                        dump::dump_op(a);
                        sblog << "\n";
                );
@@ -139,8 +151,23 @@ void peephole::optimize_SETcc_op(alu_node* a) {
                        sblog <<"\n";
                );
 
-               unsigned newop = is_pred ? get_predsetcc_opcode(cc, cmp_type) :
-                               get_setcc_opcode(cc, cmp_type, dst_type != AF_FLOAT_DST);
+               unsigned newop;
+
+               switch(op_kind) {
+               case 1:
+                       newop = get_predsetcc_op(cc, cmp_type);
+                       break;
+               case 2:
+                       newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
+                       break;
+               case 3:
+                       newop = get_killcc_op(cc, cmp_type);
+                       break;
+               default:
+                       newop = ALU_OP0_NOP;
+                       assert(!"invalid op kind");
+                       break;
+               }
 
                a->bc.set_op(newop);