glsl: Add lowering pass for ir_quadop_bitfield_insert
authorIan Romanick <ian.d.romanick@intel.com>
Fri, 24 Jun 2016 06:16:10 +0000 (23:16 -0700)
committerIan Romanick <ian.d.romanick@intel.com>
Tue, 19 Jul 2016 19:19:28 +0000 (12:19 -0700)
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/compiler/glsl/ir_optimization.h
src/compiler/glsl/lower_instructions.cpp

index 9550052d6b03fef8bf7cf31b103eb0ebd8cc374d..6fda9f62b71d83e54727a552c9712d1c3f7d2155 100644 (file)
@@ -44,6 +44,7 @@
 #define DFREXP_DLDEXP_TO_ARITH    0x1000
 #define BIT_COUNT_TO_MATH         0x02000
 #define EXTRACT_TO_SHIFTS         0x04000
+#define INSERT_TO_SHIFTS          0x08000
 
 /**
  * \see class lower_packing_builtins_visitor
index 0d103d27134ec6a7f44c7c4ef0307d4e86a72269..902294397ab41e7f37810abd2eff8400f1821cbe 100644 (file)
@@ -161,6 +161,7 @@ private:
    void dsign_to_csel(ir_expression *);
    void bit_count_to_math(ir_expression *);
    void extract_to_shifts(ir_expression *);
+   void insert_to_shifts(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -1076,6 +1077,72 @@ lower_instructions_visitor::extract_to_shifts(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::insert_to_shifts(ir_expression *ir)
+{
+   ir_constant *c1;
+   ir_constant *c32;
+   ir_constant *cFFFFFFFF;
+   ir_variable *offset =
+      new(ir) ir_variable(ir->operands[0]->type, "offset", ir_var_temporary);
+   ir_variable *bits =
+      new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary);
+   ir_variable *mask =
+      new(ir) ir_variable(ir->operands[0]->type, "mask", ir_var_temporary);
+
+   if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) {
+      c1 = new(ir) ir_constant(int(1), ir->operands[0]->type->vector_elements);
+      c32 = new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements);
+      cFFFFFFFF = new(ir) ir_constant(int(0xFFFFFFFF), ir->operands[0]->type->vector_elements);
+   } else {
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+
+      c1 = new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
+      c32 = new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements);
+      cFFFFFFFF = new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements);
+   }
+
+   base_ir->insert_before(offset);
+   base_ir->insert_before(assign(offset, ir->operands[2]));
+
+   base_ir->insert_before(bits);
+   base_ir->insert_before(assign(bits, ir->operands[3]));
+
+   /* At least some hardware treats (x << y) as (x << (y%32)).  This means
+    * we'd get a mask of 0 when bits is 32.  Special case it.
+    *
+    * mask = (bits == 32 ? 0xffffffff : (1u << bits) - 1u) << offset;
+    *
+    * Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
+    *
+    *    The result will be undefined if offset or bits is negative, or if the
+    *    sum of offset and bits is greater than the number of bits used to
+    *    store the operand.
+    *
+    * Since it's undefined, there are a couple other ways this could be
+    * implemented.  The other way that was considered was to put the csel
+    * around the whole thing:
+    *
+    *    final_result = bits == 32 ? insert : ... ;
+    */
+   base_ir->insert_before(mask);
+
+   base_ir->insert_before(assign(mask, csel(equal(bits, c32),
+                                            cFFFFFFFF,
+                                            lshift(sub(lshift(c1, bits),
+                                                       c1->clone(ir, NULL)),
+                                                   offset))));
+
+   /* (base & ~mask) | ((insert << offset) & mask) */
+   ir->operation = ir_binop_bit_or;
+   ir->operands[0] = bit_and(ir->operands[0], bit_not(mask));
+   ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask);
+   ir->operands[2] = NULL;
+   ir->operands[3] = NULL;
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1188,6 +1255,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          extract_to_shifts(ir);
       break;
 
+   case ir_quadop_bitfield_insert:
+      if (lowering(INSERT_TO_SHIFTS))
+         insert_to_shifts(ir);
+      break;
+
+
    default:
       return visit_continue;
    }