panfrost/midgard: Optimize csel involving 0
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Thu, 25 Apr 2019 03:48:08 +0000 (03:48 +0000)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Thu, 25 Apr 2019 20:37:45 +0000 (20:37 +0000)
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
src/gallium/drivers/panfrost/midgard/helpers.h
src/gallium/drivers/panfrost/midgard/midgard_compile.c

index 5912963ccddf392d96bcc607c1a88bc28a60ede3..116c69b2c14f27630451a7c2f1e53ace42bfb662 100644 (file)
@@ -217,6 +217,7 @@ static struct {
 
         /* XXX: Test case where it's right on smul but not sadd */
         [midgard_alu_op_iand]           = {"iand", UNITS_ADD | OP_COMMUTES}, 
+        [midgard_alu_op_iandnot]         = {"iandnot", UNITS_ADD},
 
         [midgard_alu_op_ior]            = {"ior", UNITS_ADD | OP_COMMUTES},
         [midgard_alu_op_ixor]           = {"ixor", UNITS_ADD | OP_COMMUTES},
@@ -237,7 +238,6 @@ static struct {
         /* These instructions are not yet emitted by the compiler, so
          * don't speculate about units yet */ 
         [midgard_alu_op_ishladd]        = {"ishladd", 0},
-        [midgard_alu_op_iandnot]        = {"iandnot", 0},
 
         [midgard_alu_op_uball_lt]       = {"uball_lt", 0},
         [midgard_alu_op_uball_lte]      = {"uball_lte", 0},
index 5b5a44013a2c78ee504e1b52313e8090514d24ea..b5ab103298e147984755c8ef0aa67af8d7abbd70 100644 (file)
@@ -1227,29 +1227,44 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                 break;
         }
 
+        /* For a few special csel cases not handled by NIR, we can opt to
+         * bitwise. Otherwise, we emit the condition and do a real csel */
+
         case nir_op_b32csel: {
-                op = midgard_alu_op_fcsel;
+                if (nir_is_fzero_constant(instr->src[2].src)) {
+                        /* (b ? v : 0) = (b & v) */
+                        op = midgard_alu_op_iand;
+                        nr_inputs = 2;
+                } else if (nir_is_fzero_constant(instr->src[1].src)) {
+                        /* (b ? 0 : v) = (!b ? v : 0) = (~b & v) = (v & ~b) */
+                        op = midgard_alu_op_iandnot;
+                        nr_inputs = 2;
+                        instr->src[1] = instr->src[0];
+                        instr->src[0] = instr->src[2];
+                } else {
+                        op = midgard_alu_op_fcsel;
 
-                /* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */
-                nr_inputs = 2;
+                        /* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */
+                        nr_inputs = 2;
 
-                /* Figure out which component the condition is in */
+                        /* Figure out which component the condition is in */
 
-                unsigned comp = instr->src[0].swizzle[0];
+                        unsigned comp = instr->src[0].swizzle[0];
 
-                /* Make sure NIR isn't throwing a mixed condition at us */
+                        /* Make sure NIR isn't throwing a mixed condition at us */
 
-                for (unsigned c = 1; c < nr_components; ++c)
-                        assert(instr->src[0].swizzle[c] == comp);
+                        for (unsigned c = 1; c < nr_components; ++c)
+                                assert(instr->src[0].swizzle[c] == comp);
 
-                /* Emit the condition into r31.w */
-                emit_condition(ctx, &instr->src[0].src, false, comp);
+                        /* Emit the condition into r31.w */
+                        emit_condition(ctx, &instr->src[0].src, false, comp);
 
-                /* The condition is the first argument; move the other
-                 * arguments up one to be a binary instruction for
-                 * Midgard */
+                        /* The condition is the first argument; move the other
+                         * arguments up one to be a binary instruction for
+                         * Midgard */
 
-                memmove(instr->src, instr->src + 1, 2 * sizeof(nir_alu_src));
+                        memmove(instr->src, instr->src + 1, 2 * sizeof(nir_alu_src));
+                }
                 break;
         }