glsl: Generate code for constant ir_quadop_vector expressions

[mesa.git] / src / compiler / glsl / ir_expression_operation.py
diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py

index 9e257f8f8a4a97822904d13dc65e5c0830aefc6b..5c899a743ee1ee8e6bce927acdd4be319472e04f 100644 (file)
--- a/src/compiler/glsl/ir_expression_operation.py
+++ b/src/compiler/glsl/ir_expression_operation.py
@@ -91,12 +91,20 @@ integer_types = (uint_type, int_type)
  real_types = (float_type, double_type)
  
  # This template is for unary and binary operations that can only have operands
-# of a single type.  ir_unop_logic_not is an example.
+# of a single type or the implementation for all types is identical.
+# ir_unop_logic_not is an example of the former, and ir_quadop_bitfield_insert
+# is an example of the latter..
  constant_template0 = mako.template.Template("""\
     case ${op.get_enum_name()}:
+    % if len(op.source_types) == 1:
        assert(op[0]->type->base_type == ${op.source_types[0].glsl_type});
+    % endif
        for (unsigned c = 0; c < op[0]->type->components(); c++)
-         data.${op.source_types[0].union_field}[c] = ${op.get_c_expression(op.source_types)};
+    % for dst_type, src_types in op.signatures():
+        % if loop.index == 0:
+         data.${dst_type.union_field}[c] = ${op.get_c_expression(src_types)};
+        % endif
+    % endfor
        break;""")
  
  # This template is for unary operations that can have operands of a several
@@ -163,7 +171,17 @@ constant_template5 = mako.template.Template("""\
  # of scalar and vector operands.
  constant_template_vector_scalar = mako.template.Template("""\
     case ${op.get_enum_name()}:
+    % if "mixed" in op.flags:
+        % for i in xrange(op.num_operands):
+      assert(op[${i}]->type->base_type == ${op.source_types[0].glsl_type} ||
+            % for src_type in op.source_types[1:-1]:
+             op[${i}]->type->base_type == ${src_type.glsl_type} ||
+            % endfor
+             op[${i}]->type->base_type == ${op.source_types[-1].glsl_type});
+        % endfor
+    % else:
        assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar);
+    % endif
        for (unsigned c = 0, c0 = 0, c1 = 0;
             c < components;
             c0 += c0_inc, c1 += c1_inc, c++) {
@@ -171,7 +189,7 @@ constant_template_vector_scalar = mako.template.Template("""\
           switch (op[0]->type->base_type) {
      % for dst_type, src_types in op.signatures():
           case ${src_types[0].glsl_type}:
-            data.${dst_type.union_field}[c] = ${op.get_c_expression(src_types, ("c0", "c1"))};
+            data.${dst_type.union_field}[c] = ${op.get_c_expression(src_types, ("c0", "c1", "c2"))};
              break;
      % endfor
           default:
@@ -180,6 +198,56 @@ constant_template_vector_scalar = mako.template.Template("""\
        }
        break;""")
  
+# This template is for multiplication.  It is unique because it has to support
+# matrix * vector and matrix * matrix operations, and those are just different.
+constant_template_mul = mako.template.Template("""\
+   case ${op.get_enum_name()}:
+      /* Check for equal types, or unequal types involving scalars */
+      if ((op[0]->type == op[1]->type && !op[0]->type->is_matrix())
+          || op0_scalar || op1_scalar) {
+         for (unsigned c = 0, c0 = 0, c1 = 0;
+              c < components;
+              c0 += c0_inc, c1 += c1_inc, c++) {
+
+            switch (op[0]->type->base_type) {
+    % for dst_type, src_types in op.signatures():
+            case ${src_types[0].glsl_type}:
+               data.${dst_type.union_field}[c] = ${op.get_c_expression(src_types, ("c0", "c1", "c2"))};
+               break;
+    % endfor
+            default:
+               assert(0);
+            }
+         }
+      } else {
+         assert(op[0]->type->is_matrix() || op[1]->type->is_matrix());
+
+         /* Multiply an N-by-M matrix with an M-by-P matrix.  Since either
+          * matrix can be a GLSL vector, either N or P can be 1.
+          *
+          * For vec*mat, the vector is treated as a row vector.  This
+          * means the vector is a 1-row x M-column matrix.
+          *
+          * For mat*vec, the vector is treated as a column vector.  Since
+          * matrix_columns is 1 for vectors, this just works.
+          */
+         const unsigned n = op[0]->type->is_vector()
+            ? 1 : op[0]->type->vector_elements;
+         const unsigned m = op[1]->type->vector_elements;
+         const unsigned p = op[1]->type->matrix_columns;
+         for (unsigned j = 0; j < p; j++) {
+            for (unsigned i = 0; i < n; i++) {
+               for (unsigned k = 0; k < m; k++) {
+                  if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+                     data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j];
+                  else
+                     data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j];
+               }
+            }
+         }
+      }
+      break;""")
+
  # This template is for operations that are horizontal and either have only a
  # single type or the implementation for all types is identical.  That is, the
  # operation consumes a vector and produces a scalar.
@@ -188,13 +256,91 @@ constant_template_horizontal_single_implementation = mako.template.Template("""\
        data.${op.dest_type.union_field}[0] = ${op.c_expression['default']};
        break;""")
  
+# This template is for operations that are horizontal and do not assign the
+# result.  The various unpack operations are examples.
+constant_template_horizontal_nonassignment = mako.template.Template("""\
+   case ${op.get_enum_name()}:
+      ${op.c_expression['default']};
+      break;""")
+
+# This template is for binary operations that are horizontal.  That is, the
+# operation consumes a vector and produces a scalar.
+constant_template_horizontal = mako.template.Template("""\
+   case ${op.get_enum_name()}:
+      switch (op[0]->type->base_type) {
+    % for dst_type, src_types in op.signatures():
+      case ${src_types[0].glsl_type}:
+         data.${dst_type.union_field}[0] = ${op.get_c_expression(src_types)};
+         break;
+    % endfor
+      default:
+         assert(0);
+      }
+      break;""")
+
+# This template is for ir_binop_vector_extract.
+constant_template_vector_extract = mako.template.Template("""\
+   case ${op.get_enum_name()}: {
+      const int c = CLAMP(op[1]->value.i[0], 0,
+                          (int) op[0]->type->vector_elements - 1);
+
+      switch (op[0]->type->base_type) {
+    % for dst_type, src_types in op.signatures():
+      case ${src_types[0].glsl_type}:
+         data.${dst_type.union_field}[0] = op[0]->value.${src_types[0].union_field}[c];
+         break;
+    % endfor
+      default:
+         assert(0);
+      }
+      break;
+   }""")
+
+# This template is for ir_triop_vector_insert.
+constant_template_vector_insert = mako.template.Template("""\
+   case ${op.get_enum_name()}: {
+      const unsigned idx = op[2]->value.u[0];
+
+      memcpy(&data, &op[0]->value, sizeof(data));
+
+      switch (this->type->base_type) {
+    % for dst_type, src_types in op.signatures():
+      case ${src_types[0].glsl_type}:
+         data.${dst_type.union_field}[idx] = op[1]->value.${src_types[0].union_field}[0];
+         break;
+    % endfor
+      default:
+         assert(!"Should not get here.");
+         break;
+      }
+      break;
+   }""")
+
+# This template is for ir_quadop_vector.
+constant_template_vector = mako.template.Template("""\
+   case ${op.get_enum_name()}:
+      for (unsigned c = 0; c < this->type->vector_elements; c++) {
+         switch (this->type->base_type) {
+    % for dst_type, src_types in op.signatures():
+         case ${src_types[0].glsl_type}:
+            data.${dst_type.union_field}[c] = op[c]->value.${src_types[0].union_field}[0];
+            break;
+    % endfor
+         default:
+            assert(0);
+         }
+      }
+      break;""")
+
  
  vector_scalar_operation = "vector-scalar"
  horizontal_operation = "horizontal"
  types_identical_operation = "identical"
+non_assign_operation = "nonassign"
+mixed_type_operation = "mixed"
  
  class operation(object):
-   def __init__(self, name, num_operands, printable_name = None, source_types = None, dest_type = None, c_expression = None, flags = None):
+   def __init__(self, name, num_operands, printable_name = None, source_types = None, dest_type = None, c_expression = None, flags = None, all_signatures = None):
        self.name = name
        self.num_operands = num_operands
  
@@ -203,7 +349,13 @@ class operation(object):
        else:
           self.printable_name = printable_name
  
-      self.source_types = source_types
+      self.all_signatures = all_signatures
+
+      if source_types is None:
+         self.source_types = tuple()
+      else:
+         self.source_types = source_types
+
        self.dest_type = dest_type
  
        if c_expression is None:
@@ -230,7 +382,9 @@ class operation(object):
           return None
  
        if self.num_operands == 1:
-         if horizontal_operation in self.flags:
+         if horizontal_operation in self.flags and non_assign_operation in self.flags:
+            return constant_template_horizontal_nonassignment.render(op=self)
+         elif horizontal_operation in self.flags:
              return constant_template_horizontal_single_implementation.render(op=self)
           elif self.dest_type is not None and len(self.source_types) == 1:
              return constant_template2.render(op=self)
@@ -243,30 +397,55 @@ class operation(object):
           else:
              return constant_template3.render(op=self)
        elif self.num_operands == 2:
-         if vector_scalar_operation in self.flags:
+         if self.name == "mul":
+            return constant_template_mul.render(op=self)
+         elif self.name == "vector_extract":
+            return constant_template_vector_extract.render(op=self)
+         elif vector_scalar_operation in self.flags:
              return constant_template_vector_scalar.render(op=self)
           elif horizontal_operation in self.flags and types_identical_operation in self.flags:
              return constant_template_horizontal_single_implementation.render(op=self)
+         elif horizontal_operation in self.flags:
+            return constant_template_horizontal.render(op=self)
           elif len(self.source_types) == 1:
              return constant_template0.render(op=self)
           elif self.dest_type is not None:
              return constant_template5.render(op=self)
+         else:
+            return constant_template3.render(op=self)
+      elif self.num_operands == 3:
+         if self.name == "vector_insert":
+            return constant_template_vector_insert.render(op=self)
+         else:
+            return constant_template3.render(op=self)
+      elif self.num_operands == 4:
+         if self.name == "vector":
+            return constant_template_vector.render(op=self)
+         elif types_identical_operation in self.flags:
+            return constant_template0.render(op=self)
  
        return None
  
  
-   def get_c_expression(self, types, indices=("c", "c")):
+   def get_c_expression(self, types, indices=("c", "c", "c")):
        src0 = "op[0]->value.{}[{}]".format(types[0].union_field, indices[0])
        src1 = "op[1]->value.{}[{}]".format(types[1].union_field, indices[1]) if len(types) >= 2 else "ERROR"
+      src2 = "op[2]->value.{}[{}]".format(types[2].union_field, indices[2]) if len(types) >= 3 else "ERROR"
+      src3 = "op[3]->value.{}[c]".format(types[3].union_field) if len(types) >= 4 else "ERROR"
  
        expr = self.c_expression[types[0].union_field] if types[0].union_field in self.c_expression else self.c_expression['default']
  
        return expr.format(src0=src0,
-                         src1=src1)
+                         src1=src1,
+                         src2=src2,
+                         src3=src3)
  
  
     def signatures(self):
-      return type_signature_iter(self.dest_type, self.source_types, self.num_operands)
+      if self.all_signatures is not None:
+         return self.all_signatures
+      else:
+         return type_signature_iter(self.dest_type, self.source_types, self.num_operands)
  
  
  ir_expression_operation = [
@@ -351,11 +530,11 @@ ir_expression_operation = [
     operation("pack_unorm_2x16", 1, printable_name="packUnorm2x16", source_types=(float_type,), dest_type=uint_type, c_expression="pack_2x16(pack_unorm_1x16, op[0]->value.f[0], op[0]->value.f[1])", flags=horizontal_operation),
     operation("pack_unorm_4x8", 1, printable_name="packUnorm4x8", source_types=(float_type,), dest_type=uint_type, c_expression="pack_4x8(pack_unorm_1x8, op[0]->value.f[0], op[0]->value.f[1], op[0]->value.f[2], op[0]->value.f[3])", flags=horizontal_operation),
     operation("pack_half_2x16", 1, printable_name="packHalf2x16", source_types=(float_type,), dest_type=uint_type, c_expression="pack_2x16(pack_half_1x16, op[0]->value.f[0], op[0]->value.f[1])", flags=horizontal_operation),
-   operation("unpack_snorm_2x16", 1, printable_name="unpackSnorm2x16"),
-   operation("unpack_snorm_4x8", 1, printable_name="unpackSnorm4x8"),
-   operation("unpack_unorm_2x16", 1, printable_name="unpackUnorm2x16"),
-   operation("unpack_unorm_4x8", 1, printable_name="unpackUnorm4x8"),
-   operation("unpack_half_2x16", 1, printable_name="unpackHalf2x16"),
+   operation("unpack_snorm_2x16", 1, printable_name="unpackSnorm2x16", source_types=(uint_type,), dest_type=float_type, c_expression="unpack_2x16(unpack_snorm_1x16, op[0]->value.u[0], &data.f[0], &data.f[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_snorm_4x8", 1, printable_name="unpackSnorm4x8", source_types=(uint_type,), dest_type=float_type, c_expression="unpack_4x8(unpack_snorm_1x8, op[0]->value.u[0], &data.f[0], &data.f[1], &data.f[2], &data.f[3])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_unorm_2x16", 1, printable_name="unpackUnorm2x16", source_types=(uint_type,), dest_type=float_type, c_expression="unpack_2x16(unpack_unorm_1x16, op[0]->value.u[0], &data.f[0], &data.f[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_unorm_4x8", 1, printable_name="unpackUnorm4x8", source_types=(uint_type,), dest_type=float_type, c_expression="unpack_4x8(unpack_unorm_1x8, op[0]->value.u[0], &data.f[0], &data.f[1], &data.f[2], &data.f[3])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_half_2x16", 1, printable_name="unpackHalf2x16", source_types=(uint_type,), dest_type=float_type, c_expression="unpack_2x16(unpack_half_1x16, op[0]->value.u[0], &data.f[0], &data.f[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
  
     # Bit operations, part of ARB_gpu_shader5.
     operation("bitfield_reverse", 1, source_types=integer_types, c_expression="bitfield_reverse({src0})"),
@@ -366,8 +545,8 @@ ir_expression_operation = [
     operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"),
  
     # Double packing, part of ARB_gpu_shader_fp64.
-   operation("pack_double_2x32", 1, printable_name="packDouble2x32"),
-   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32"),
+   operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
  
     operation("frexp_sig", 1),
     operation("frexp_exp", 1),
@@ -401,7 +580,7 @@ ir_expression_operation = [
     operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation),
     operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation),
     # "Floating-point or low 32-bit integer multiply."
-   operation("mul", 2, printable_name="*"),
+   operation("mul", 2, printable_name="*", source_types=numeric_types, c_expression="{src0} * {src1}"),
     operation("imul_high", 2),       # Calculates the high 32-bits of a 64-bit multiply.
     operation("div", 2, printable_name="/", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} / {src1}", 'i': "{src1} == 0 ? 0 : {src0} / {src1}", 'default': "{src0} / {src1}"}, flags=vector_scalar_operation),
  
@@ -436,8 +615,8 @@ ir_expression_operation = [
     operation("any_nequal", 2, source_types=all_types, dest_type=bool_type, c_expression="!op[0]->has_value(op[1])", flags=frozenset((horizontal_operation, types_identical_operation))),
  
     # Bit-wise binary operations.
-   operation("lshift", 2, printable_name="<<"),
-   operation("rshift", 2, printable_name=">>"),
+   operation("lshift", 2, printable_name="<<", source_types=integer_types, c_expression="{src0} << {src1}", flags=frozenset((vector_scalar_operation, mixed_type_operation))),
+   operation("rshift", 2, printable_name=">>", source_types=integer_types, c_expression="{src0} >> {src1}", flags=frozenset((vector_scalar_operation, mixed_type_operation))),
     operation("bit_and", 2, printable_name="&", source_types=integer_types, c_expression="{src0} & {src1}", flags=vector_scalar_operation),
     operation("bit_xor", 2, printable_name="^", source_types=integer_types, c_expression="{src0} ^ {src1}", flags=vector_scalar_operation),
     operation("bit_or", 2, printable_name="|", source_types=integer_types, c_expression="{src0} | {src1}", flags=vector_scalar_operation),
@@ -446,7 +625,7 @@ ir_expression_operation = [
     operation("logic_xor", 2, printable_name="^^", source_types=(bool_type,), c_expression="{src0} != {src1}"),
     operation("logic_or", 2, printable_name="||", source_types=(bool_type,), c_expression="{src0} || {src1}"),
  
-   operation("dot", 2),
+   operation("dot", 2, source_types=real_types, c_expression={'f': "dot_f(op[0], op[1])", 'd': "dot_d(op[0], op[1])"}, flags=horizontal_operation),
     operation("min", 2, source_types=numeric_types, c_expression="MIN2({src0}, {src1})", flags=vector_scalar_operation),
     operation("max", 2, source_types=numeric_types, c_expression="MAX2({src0}, {src1})", flags=vector_scalar_operation),
  
@@ -459,13 +638,17 @@ ir_expression_operation = [
     operation("ubo_load", 2),
  
     # Multiplies a number by two to a power, part of ARB_gpu_shader5.
-   operation("ldexp", 2),
+   operation("ldexp", 2,
+             all_signatures=((float_type, (float_type, int_type)),
+                             (double_type, (double_type, int_type))),
+             c_expression={'f': "ldexpf_flush_subnormal({src0}, {src1})",
+                           'd': "ldexp_flush_subnormal({src0}, {src1})"}),
  
     # Extract a scalar from a vector
     #
     # operand0 is the vector
     # operand1 is the index of the field to read from operand0
-   operation("vector_extract", 2),
+   operation("vector_extract", 2, source_types=all_types, c_expression="anything-except-None"),
  
     # Interpolate fs input at offset
     #
@@ -480,7 +663,7 @@ ir_expression_operation = [
     operation("interpolate_at_sample", 2),
  
     # Fused floating-point multiply-add, part of ARB_gpu_shader5.
-   operation("fma", 3),
+   operation("fma", 3, source_types=real_types, c_expression="{src0} * {src1} + {src2}"),
  
     operation("lrp", 3),
  
@@ -492,18 +675,26 @@ ir_expression_operation = [
     # See also lower_instructions_visitor::ldexp_to_arith
     operation("csel", 3),
  
-   operation("bitfield_extract", 3),
+   operation("bitfield_extract", 3,
+             all_signatures=((int_type, (uint_type, int_type, int_type)),
+                             (int_type, (int_type, int_type, int_type))),
+             c_expression={'u': "bitfield_extract_uint({src0}, {src1}, {src2})",
+                           'i': "bitfield_extract_int({src0}, {src1}, {src2})"}),
  
     # Generate a value with one field of a vector changed
     #
     # operand0 is the vector
     # operand1 is the value to write into the vector result
     # operand2 is the index in operand0 to be modified
-   operation("vector_insert", 3),
+   operation("vector_insert", 3, source_types=all_types, c_expression="anything-except-None"),
  
-   operation("bitfield_insert", 4),
+   operation("bitfield_insert", 4,
+             all_signatures=((uint_type, (uint_type, uint_type, int_type, int_type)),
+                             (int_type, (int_type, int_type, int_type, int_type))),
+             c_expression="bitfield_insert({src0}, {src1}, {src2}, {src3})",
+             flags=types_identical_operation),
  
-   operation("vector", 4),
+   operation("vector", 4, source_types=all_types, c_expression="anything-except-None"),
  ]