glsl: Add lowering pass for ir_unop_bitfield_reverse
authorIan Romanick <ian.d.romanick@intel.com>
Fri, 24 Jun 2016 07:11:26 +0000 (00:11 -0700)
committerIan Romanick <ian.d.romanick@intel.com>
Tue, 19 Jul 2016 19:19:28 +0000 (12:19 -0700)
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/compiler/glsl/ir_optimization.h
src/compiler/glsl/lower_instructions.cpp

index 6fda9f62b71d83e54727a552c9712d1c3f7d2155..77c1260e689c989e00e49d9e74cd80368759a78b 100644 (file)
@@ -45,6 +45,7 @@
 #define BIT_COUNT_TO_MATH         0x02000
 #define EXTRACT_TO_SHIFTS         0x04000
 #define INSERT_TO_SHIFTS          0x08000
+#define REVERSE_TO_SHIFTS         0x10000
 
 /**
  * \see class lower_packing_builtins_visitor
index 902294397ab41e7f37810abd2eff8400f1821cbe..86af49d00b05d5a592dd22879a3ba9671f52b29f 100644 (file)
@@ -162,6 +162,7 @@ private:
    void bit_count_to_math(ir_expression *);
    void extract_to_shifts(ir_expression *);
    void insert_to_shifts(ir_expression *);
+   void reverse_to_shifts(ir_expression *ir);
 };
 
 } /* anonymous namespace */
@@ -1143,6 +1144,92 @@ lower_instructions_visitor::insert_to_shifts(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::reverse_to_shifts(ir_expression *ir)
+{
+   /* For more details, see:
+    *
+    * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+    */
+   ir_constant *c1 =
+      new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
+   ir_constant *c2 =
+      new(ir) ir_constant(2u, ir->operands[0]->type->vector_elements);
+   ir_constant *c4 =
+      new(ir) ir_constant(4u, ir->operands[0]->type->vector_elements);
+   ir_constant *c8 =
+      new(ir) ir_constant(8u, ir->operands[0]->type->vector_elements);
+   ir_constant *c16 =
+      new(ir) ir_constant(16u, ir->operands[0]->type->vector_elements);
+   ir_constant *c33333333 =
+      new(ir) ir_constant(0x33333333u, ir->operands[0]->type->vector_elements);
+   ir_constant *c55555555 =
+      new(ir) ir_constant(0x55555555u, ir->operands[0]->type->vector_elements);
+   ir_constant *c0F0F0F0F =
+      new(ir) ir_constant(0x0F0F0F0Fu, ir->operands[0]->type->vector_elements);
+   ir_constant *c00FF00FF =
+      new(ir) ir_constant(0x00FF00FFu, ir->operands[0]->type->vector_elements);
+   ir_variable *temp =
+      new(ir) ir_variable(glsl_type::uvec(ir->operands[0]->type->vector_elements),
+                          "temp", ir_var_temporary);
+   ir_instruction &i = *base_ir;
+
+   i.insert_before(temp);
+
+   if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
+      i.insert_before(assign(temp, ir->operands[0]));
+   } else {
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+      i.insert_before(assign(temp, i2u(ir->operands[0])));
+   }
+
+   /* Swap odd and even bits.
+    *
+    * temp = ((temp >> 1) & 0x55555555u) | ((temp & 0x55555555u) << 1);
+    */
+   i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c1), c55555555),
+                                       lshift(bit_and(temp, c55555555->clone(ir, NULL)),
+                                              c1->clone(ir, NULL)))));
+   /* Swap consecutive pairs.
+    *
+    * temp = ((temp >> 2) & 0x33333333u) | ((temp & 0x33333333u) << 2);
+    */
+   i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c2), c33333333),
+                                       lshift(bit_and(temp, c33333333->clone(ir, NULL)),
+                                              c2->clone(ir, NULL)))));
+
+   /* Swap nibbles.
+    *
+    * temp = ((temp >> 4) & 0x0F0F0F0Fu) | ((temp & 0x0F0F0F0Fu) << 4);
+    */
+   i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c4), c0F0F0F0F),
+                                       lshift(bit_and(temp, c0F0F0F0F->clone(ir, NULL)),
+                                              c4->clone(ir, NULL)))));
+
+   /* The last step is, basically, bswap.  Swap the bytes, then swap the
+    * words.  When this code is run through GCC on x86, it does generate a
+    * bswap instruction.
+    *
+    * temp = ((temp >> 8) & 0x00FF00FFu) | ((temp & 0x00FF00FFu) << 8);
+    * temp = ( temp >> 16              ) | ( temp                << 16);
+    */
+   i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c8), c00FF00FF),
+                                       lshift(bit_and(temp, c00FF00FF->clone(ir, NULL)),
+                                              c8->clone(ir, NULL)))));
+
+   if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
+      ir->operation = ir_binop_bit_or;
+      ir->operands[0] = rshift(temp, c16);
+      ir->operands[1] = lshift(temp, c16->clone(ir, NULL));
+   } else {
+      ir->operation = ir_unop_u2i;
+      ir->operands[0] = bit_or(rshift(temp, c16),
+                               lshift(temp, c16->clone(ir, NULL)));
+   }
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1260,6 +1347,10 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          insert_to_shifts(ir);
       break;
 
+   case ir_unop_bitfield_reverse:
+      if (lowering(REVERSE_TO_SHIFTS))
+         reverse_to_shifts(ir);
+      break;
 
    default:
       return visit_continue;