glsl, nir: Make ir_quadop_bitfield_insert a vectorized operation.
authorKenneth Graunke <kenneth@whitecape.org>
Tue, 5 Jan 2016 12:01:11 +0000 (04:01 -0800)
committerMatt Turner <mattst88@gmail.com>
Wed, 13 Jan 2016 18:35:12 +0000 (10:35 -0800)
We would like to be able to combine

   result.x = bitfieldInsert(src0.x, src1.x, src2.x, src3.x);
   result.y = bitfieldInsert(src0.y, src1.y, src2.y, src3.y);
   result.z = bitfieldInsert(src0.z, src1.z, src2.z, src3.z);
   result.w = bitfieldInsert(src0.w, src1.w, src2.w, src3.w);

into a single ivec4 bitfieldInsert operation.  This should be possible
with most drivers.

This patch changes the offset and bits parameters from scalar ints
to ivecN or uvecN.  The type of all four operands will be the same,
for simplicity.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/glsl/builtin_functions.cpp
src/glsl/ir.h
src/glsl/ir_constant_expression.cpp
src/glsl/ir_validate.cpp
src/glsl/lower_instructions.cpp
src/glsl/lower_packing_builtins.cpp
src/glsl/nir/nir_opcodes.py

index 602852a78d1092a6357b1fb81919be28a7e902dc..38383bc1988791d39acafd58d75a56e692b05bc6 100644 (file)
@@ -4902,13 +4902,19 @@ builtin_builder::_bitfieldExtract(const glsl_type *type)
 ir_function_signature *
 builtin_builder::_bitfieldInsert(const glsl_type *type)
 {
+   bool is_uint = type->base_type == GLSL_TYPE_UINT;
    ir_variable *base   = in_var(type, "base");
    ir_variable *insert = in_var(type, "insert");
    ir_variable *offset = in_var(glsl_type::int_type, "offset");
    ir_variable *bits   = in_var(glsl_type::int_type, "bits");
    MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits);
 
-   body.emit(ret(bitfield_insert(base, insert, offset, bits)));
+   operand cast_offset = is_uint ? i2u(offset) : operand(offset);
+   operand cast_bits = is_uint ? i2u(bits) : operand(bits);
+
+   body.emit(ret(bitfield_insert(base, insert,
+      swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements),
+      swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements))));
 
    return sig;
 }
index a2eb508e9eafe43f65a1f93d459286aab5404114..9af2fc16e16f9e8570c28ace99eb5e468eb1ff52 100644 (file)
@@ -1710,7 +1710,6 @@ public:
              operation == ir_triop_vector_insert ||
              operation == ir_quadop_vector ||
              /* TODO: these can't currently be vectorized */
-             operation == ir_quadop_bitfield_insert ||
              operation == ir_triop_bitfield_extract;
    }
 
index 38b6dd59759a63e4bde7d5f0efae8d30c70e9085..f5b5bd87b6b13c962feb28dc1f7ff5191b3d24e9 100644 (file)
@@ -1710,10 +1710,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
    }
 
    case ir_quadop_bitfield_insert: {
-      int offset = op[2]->value.i[0];
-      int bits = op[3]->value.i[0];
-
       for (unsigned c = 0; c < components; c++) {
+         int offset = op[2]->value.i[c];
+         int bits = op[3]->value.i[c];
+
          if (bits == 0)
             data.u[c] = op[0]->value.u[c];
          else if (offset < 0 || bits < 0)
index a4d61822faab334ef0dbf4fe403c6399e85b24b1..fea9b760eca803ab3818f35feb52648b05a6c312 100644 (file)
@@ -647,10 +647,11 @@ ir_validate::visit_leave(ir_expression *ir)
       break;
 
    case ir_quadop_bitfield_insert:
+      assert(ir->type->is_integer());
       assert(ir->operands[0]->type == ir->type);
       assert(ir->operands[1]->type == ir->type);
-      assert(ir->operands[2]->type == glsl_type::int_type);
-      assert(ir->operands[3]->type == glsl_type::int_type);
+      assert(ir->operands[2]->type == ir->type);
+      assert(ir->operands[3]->type == ir->type);
       break;
 
    case ir_quadop_vector:
index f70db873fd7b33652358e2c79997e08155d0809e..d140be346cf041112774343a202880b2420db49b 100644 (file)
@@ -381,8 +381,8 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
 
    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
 
-   ir_constant *exp_shift = new(ir) ir_constant(23);
-   ir_constant *exp_width = new(ir) ir_constant(8);
+   ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
+   ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
 
    /* Temporary variables */
    ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
@@ -470,8 +470,8 @@ lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
 
    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
 
-   ir_constant *exp_shift = new(ir) ir_constant(20);
-   ir_constant *exp_width = new(ir) ir_constant(11);
+   ir_constant *exp_shift = new(ir) ir_constant(20, vec_elem);
+   ir_constant *exp_width = new(ir) ir_constant(11, vec_elem);
    ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
 
    /* Temporary variables */
index c8bf68be829a395c3cde2ceb22e25ff6e3460555..19eeaa3775c5928066d0fc50f10fa677dbfa3e72 100644 (file)
@@ -230,8 +230,8 @@ private:
       if (op_mask & LOWER_PACK_USE_BFI) {
          return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
                                 swizzle_y(u),
-                                constant(16),
-                                constant(16));
+                                constant(16u),
+                                constant(16u));
       }
 
       /* return (u.y << 16) | (u.x & 0xffff); */
@@ -261,9 +261,9 @@ private:
          return bitfield_insert(bitfield_insert(
                                    bitfield_insert(
                                       bit_and(swizzle_x(u), constant(0xffu)),
-                                      swizzle_y(u), constant(8), constant(8)),
-                                   swizzle_z(u), constant(16), constant(8)),
-                                swizzle_w(u), constant(24), constant(8));
+                                      swizzle_y(u), constant(8u), constant(8u)),
+                                   swizzle_z(u), constant(16u), constant(8u)),
+                                swizzle_w(u), constant(24u), constant(8u));
       }
 
       /* uvec4 u = UVEC4_RVAL & 0xff */
index 398ae50f9f77d04496a19d52c6b9de98ad15d590..3780628d1ea9faba25f3613ec5770be0558019a3 100644 (file)
@@ -609,10 +609,10 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
           [tuint, tuint, tuint, tuint],
           "", const_expr)
 
-opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
+opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
        [tuint, tuint, tint, tint], "", """
 unsigned base = src0, insert = src1;
-int offset = src2.x, bits = src3.x;
+int offset = src2, bits = src3;
 if (bits == 0) {
    dst = 0;
 } else if (offset < 0 || bits < 0 || bits + offset > 32) {