nir: Don't bail too early in lower_mem_constant_vars
[mesa.git] / src / compiler / glsl / ir_expression_operation.py
index d5a06f864eb429b284fb80e8c37698018b621efb..0d8d7a6f9eddfddb61ed4a2da6d29ca3cba8b434 100644 (file)
@@ -1,4 +1,3 @@
-#! /usr/bin/env python
 #
 # Copyright (C) 2015 Intel Corporation
 #
@@ -63,7 +62,7 @@ class type_signature_iter(object):
    def __iter__(self):
       return self
 
-   def next(self):
+   def __next__(self):
       if self.i < len(self.source_types):
          i = self.i
          self.i += 1
@@ -77,17 +76,21 @@ class type_signature_iter(object):
       else:
          raise StopIteration()
 
+   next = __next__
+
 
 uint_type = type("unsigned", "u", "GLSL_TYPE_UINT")
 int_type = type("int", "i", "GLSL_TYPE_INT")
+uint64_type = type("uint64_t", "u64", "GLSL_TYPE_UINT64")
+int64_type = type("int64_t", "i64", "GLSL_TYPE_INT64")
 float_type = type("float", "f", "GLSL_TYPE_FLOAT")
 double_type = type("double", "d", "GLSL_TYPE_DOUBLE")
 bool_type = type("bool", "b", "GLSL_TYPE_BOOL")
 
-all_types = (uint_type, int_type, float_type, double_type, bool_type)
-numeric_types = (uint_type, int_type, float_type, double_type)
-signed_numeric_types = (int_type, float_type, double_type)
-integer_types = (uint_type, int_type)
+all_types = (uint_type, int_type, float_type, double_type, uint64_type, int64_type, bool_type)
+numeric_types = (uint_type, int_type, float_type, double_type, uint64_type, int64_type)
+signed_numeric_types = (int_type, float_type, double_type, int64_type)
+integer_types = (uint_type, int_type, uint64_type, int64_type)
 real_types = (float_type, double_type)
 
 # This template is for operations that can have operands of a several
@@ -103,7 +106,7 @@ constant_template_common = mako.template.Template("""\
             break;
     % endfor
          default:
-            assert(0);
+            unreachable("invalid type");
          }
       }
       break;""")
@@ -113,7 +116,7 @@ constant_template_common = mako.template.Template("""\
 constant_template_vector_scalar = mako.template.Template("""\
    case ${op.get_enum_name()}:
     % if "mixed" in op.flags:
-        % for i in xrange(op.num_operands):
+        % for i in range(op.num_operands):
       assert(op[${i}]->type->base_type == ${op.source_types[0].glsl_type} ||
             % for src_type in op.source_types[1:-1]:
              op[${i}]->type->base_type == ${src_type.glsl_type} ||
@@ -134,7 +137,7 @@ constant_template_vector_scalar = mako.template.Template("""\
             break;
     % endfor
          default:
-            assert(0);
+            unreachable("invalid type");
          }
       }
       break;""")
@@ -157,7 +160,7 @@ constant_template_mul = mako.template.Template("""\
                break;
     % endfor
             default:
-               assert(0);
+               unreachable("invalid type");
             }
          }
       } else {
@@ -179,7 +182,7 @@ constant_template_mul = mako.template.Template("""\
          for (unsigned j = 0; j < p; j++) {
             for (unsigned i = 0; i < n; i++) {
                for (unsigned k = 0; k < m; k++) {
-                  if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+                  if (op[0]->type->is_double())
                      data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j];
                   else
                      data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j];
@@ -215,7 +218,7 @@ constant_template_horizontal = mako.template.Template("""\
          break;
     % endfor
       default:
-         assert(0);
+         unreachable("invalid type");
       }
       break;""")
 
@@ -232,7 +235,7 @@ constant_template_vector_extract = mako.template.Template("""\
          break;
     % endfor
       default:
-         assert(0);
+         unreachable("invalid type");
       }
       break;
    }""")
@@ -244,15 +247,14 @@ constant_template_vector_insert = mako.template.Template("""\
 
       memcpy(&data, &op[0]->value, sizeof(data));
 
-      switch (this->type->base_type) {
+      switch (return_type->base_type) {
     % for dst_type, src_types in op.signatures():
       case ${src_types[0].glsl_type}:
          data.${dst_type.union_field}[idx] = op[1]->value.${src_types[0].union_field}[0];
          break;
     % endfor
       default:
-         assert(!"Should not get here.");
-         break;
+         unreachable("invalid type");
       }
       break;
    }""")
@@ -260,15 +262,15 @@ constant_template_vector_insert = mako.template.Template("""\
 # This template is for ir_quadop_vector.
 constant_template_vector = mako.template.Template("""\
    case ${op.get_enum_name()}:
-      for (unsigned c = 0; c < this->type->vector_elements; c++) {
-         switch (this->type->base_type) {
+      for (unsigned c = 0; c < return_type->vector_elements; c++) {
+         switch (return_type->base_type) {
     % for dst_type, src_types in op.signatures():
          case ${src_types[0].glsl_type}:
             data.${dst_type.union_field}[c] = op[c]->value.${src_types[0].union_field}[0];
             break;
     % endfor
          default:
-            assert(0);
+            unreachable("invalid type");
          }
       }
       break;""")
@@ -276,23 +278,20 @@ constant_template_vector = mako.template.Template("""\
 # This template is for ir_triop_lrp.
 constant_template_lrp = mako.template.Template("""\
    case ${op.get_enum_name()}: {
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT ||
-             op[0]->type->base_type == GLSL_TYPE_DOUBLE);
-      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT ||
-             op[1]->type->base_type == GLSL_TYPE_DOUBLE);
-      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT ||
-             op[2]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[0]->type->is_float() || op[0]->type->is_double());
+      assert(op[1]->type->is_float() || op[1]->type->is_double());
+      assert(op[2]->type->is_float() || op[2]->type->is_double());
 
       unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
       for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
-         switch (this->type->base_type) {
+         switch (return_type->base_type) {
     % for dst_type, src_types in op.signatures():
          case ${src_types[0].glsl_type}:
             data.${dst_type.union_field}[c] = ${op.get_c_expression(src_types, ("c", "c", "c2"))};
             break;
     % endfor
          default:
-            assert(0);
+            unreachable("invalid type");
          }
       }
       break;
@@ -304,14 +303,14 @@ constant_template_lrp = mako.template.Template("""\
 constant_template_csel = mako.template.Template("""\
    case ${op.get_enum_name()}:
       for (unsigned c = 0; c < components; c++) {
-         switch (this->type->base_type) {
+         switch (return_type->base_type) {
     % for dst_type, src_types in op.signatures():
          case ${src_types[1].glsl_type}:
             data.${dst_type.union_field}[c] = ${op.get_c_expression(src_types)};
             break;
     % endfor
          default:
-            assert(0);
+            unreachable("invalid type");
          }
       }
       break;""")
@@ -358,7 +357,7 @@ class operation(object):
 
 
    def get_enum_name(self):
-      return "ir_{}op_{}".format(("un", "bin", "tri", "quad")[self.num_operands-1], self.name)
+      return "ir_{0}op_{1}".format(("un", "bin", "tri", "quad")[self.num_operands-1], self.name)
 
 
    def get_template(self):
@@ -395,10 +394,10 @@ class operation(object):
 
 
    def get_c_expression(self, types, indices=("c", "c", "c")):
-      src0 = "op[0]->value.{}[{}]".format(types[0].union_field, indices[0])
-      src1 = "op[1]->value.{}[{}]".format(types[1].union_field, indices[1]) if len(types) >= 2 else "ERROR"
-      src2 = "op[2]->value.{}[{}]".format(types[2].union_field, indices[2]) if len(types) >= 3 else "ERROR"
-      src3 = "op[3]->value.{}[c]".format(types[3].union_field) if len(types) >= 4 else "ERROR"
+      src0 = "op[0]->value.{0}[{1}]".format(types[0].union_field, indices[0])
+      src1 = "op[1]->value.{0}[{1}]".format(types[1].union_field, indices[1]) if len(types) >= 2 else "ERROR"
+      src2 = "op[2]->value.{0}[{1}]".format(types[2].union_field, indices[2]) if len(types) >= 3 else "ERROR"
+      src3 = "op[3]->value.{0}[c]".format(types[3].union_field) if len(types) >= 4 else "ERROR"
 
       expr = self.c_expression[types[0].union_field] if types[0].union_field in self.c_expression else self.c_expression['default']
 
@@ -418,10 +417,10 @@ class operation(object):
 ir_expression_operation = [
    operation("bit_not", 1, printable_name="~", source_types=integer_types, c_expression="~ {src0}"),
    operation("logic_not", 1, printable_name="!", source_types=(bool_type,), c_expression="!{src0}"),
-   operation("neg", 1, source_types=numeric_types, c_expression={'u': "-((int) {src0})", 'default': "-{src0}"}),
-   operation("abs", 1, source_types=signed_numeric_types, c_expression={'i': "{src0} < 0 ? -{src0} : {src0}", 'f': "fabsf({src0})", 'd': "fabs({src0})"}),
-   operation("sign", 1, source_types=signed_numeric_types, c_expression={'i': "({src0} > 0) - ({src0} < 0)", 'f': "float(({src0} > 0.0F) - ({src0} < 0.0F))", 'd': "double(({src0} > 0.0) - ({src0} < 0.0))"}),
-   operation("rcp", 1, source_types=real_types, c_expression={'f': "{src0} != 0.0F ? 1.0F / {src0} : 0.0F", 'd': "{src0} != 0.0 ? 1.0 / {src0} : 0.0"}),
+   operation("neg", 1, source_types=numeric_types, c_expression={'u': "-((int) {src0})", 'u64': "-((int64_t) {src0})", 'default': "-{src0}"}),
+   operation("abs", 1, source_types=signed_numeric_types, c_expression={'i': "{src0} < 0 ? -{src0} : {src0}", 'f': "fabsf({src0})", 'd': "fabs({src0})", 'i64': "{src0} < 0 ? -{src0} : {src0}"}),
+   operation("sign", 1, source_types=signed_numeric_types, c_expression={'i': "({src0} > 0) - ({src0} < 0)", 'f': "float(({src0} > 0.0F) - ({src0} < 0.0F))", 'd': "double(({src0} > 0.0) - ({src0} < 0.0))", 'i64': "({src0} > 0) - ({src0} < 0)"}),
+   operation("rcp", 1, source_types=real_types, c_expression={'f': "1.0F / {src0}", 'd': "1.0 / {src0}"}),
    operation("rsq", 1, source_types=real_types, c_expression={'f': "1.0F / sqrtf({src0})", 'd': "1.0 / sqrt({src0})"}),
    operation("sqrt", 1, source_types=real_types, c_expression={'f': "sqrtf({src0})", 'd': "sqrt({src0})"}),
    operation("exp", 1, source_types=(float_type,), c_expression="expf({src0})"),         # Log base e on gentype
@@ -439,8 +438,10 @@ ir_expression_operation = [
    operation("f2b", 1, source_types=(float_type,), dest_type=bool_type, c_expression="{src0} != 0.0F ? true : false"),
    # Boolean-to-float conversion
    operation("b2f", 1, source_types=(bool_type,), dest_type=float_type, c_expression="{src0} ? 1.0F : 0.0F"),
+   # Boolean-to-float16 conversion
+   operation("b2f16", 1, source_types=(bool_type,), dest_type=float_type, c_expression="{src0} ? 1.0F : 0.0F"),
    # int-to-boolean conversion
-   operation("i2b", 1, source_types=integer_types, dest_type=bool_type, c_expression="{src0} ? true : false"),
+   operation("i2b", 1, source_types=(uint_type, int_type), dest_type=bool_type, c_expression="{src0} ? true : false"),
    # Boolean-to-int conversion
    operation("b2i", 1, source_types=(bool_type,), dest_type=int_type, c_expression="{src0} ? 1 : 0"),
    # Unsigned-to-float conversion.
@@ -453,6 +454,18 @@ ir_expression_operation = [
    operation("d2f", 1, source_types=(double_type,), dest_type=float_type, c_expression="{src0}"),
    # Float-to-double conversion.
    operation("f2d", 1, source_types=(float_type,), dest_type=double_type, c_expression="{src0}"),
+   # Half-float conversions. These all operate on and return float types,
+   # since the framework expands half to full float before calling in.  We
+   # still have to handle them here so that we can constant propagate through
+   # them, but they are no-ops.
+   operation("f2f16", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
+   operation("f2fmp", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
+   operation("f162f", 1, source_types=(float_type,), dest_type=float_type, c_expression="{src0}"),
+   # int16<->int32 conversion.
+   operation("i2i", 1, source_types=(int_type,), dest_type=int_type, c_expression="{src0}"),
+   operation("i2imp", 1, source_types=(int_type,), dest_type=int_type, c_expression="{src0}"),
+   operation("u2u", 1, source_types=(uint_type,), dest_type=uint_type, c_expression="{src0}"),
+   operation("u2ump", 1, source_types=(uint_type,), dest_type=uint_type, c_expression="{src0}"),
    # Double-to-integer conversion.
    operation("d2i", 1, source_types=(double_type,), dest_type=int_type, c_expression="{src0}"),
    # Integer-to-double conversion.
@@ -463,6 +476,8 @@ ir_expression_operation = [
    operation("u2d", 1, source_types=(uint_type,), dest_type=double_type, c_expression="{src0}"),
    # Double-to-boolean conversion.
    operation("d2b", 1, source_types=(double_type,), dest_type=bool_type, c_expression="{src0} != 0.0"),
+   # Float16-to-boolean conversion.
+   operation("f162b", 1, source_types=(float_type,), dest_type=bool_type, c_expression="{src0} != 0.0"),
    # 'Bit-identical int-to-float "conversion"
    operation("bitcast_i2f", 1, source_types=(int_type,), dest_type=float_type, c_expression="bitcast_u2f({src0})"),
    # 'Bit-identical float-to-int "conversion"
@@ -471,6 +486,37 @@ ir_expression_operation = [
    operation("bitcast_u2f", 1, source_types=(uint_type,), dest_type=float_type, c_expression="bitcast_u2f({src0})"),
    # 'Bit-identical float-to-uint "conversion"
    operation("bitcast_f2u", 1, source_types=(float_type,), dest_type=uint_type, c_expression="bitcast_f2u({src0})"),
+   # Bit-identical u64-to-double "conversion"
+   operation("bitcast_u642d", 1, source_types=(uint64_type,), dest_type=double_type, c_expression="bitcast_u642d({src0})"),
+   # Bit-identical i64-to-double "conversion"
+   operation("bitcast_i642d", 1, source_types=(int64_type,), dest_type=double_type, c_expression="bitcast_i642d({src0})"),
+   # Bit-identical double-to_u64 "conversion"
+   operation("bitcast_d2u64", 1, source_types=(double_type,), dest_type=uint64_type, c_expression="bitcast_d2u64({src0})"),
+   # Bit-identical double-to-i64 "conversion"
+   operation("bitcast_d2i64", 1, source_types=(double_type,), dest_type=int64_type, c_expression="bitcast_d2i64({src0})"),
+   # i64-to-i32 conversion
+   operation("i642i", 1, source_types=(int64_type,), dest_type=int_type, c_expression="{src0}"),
+   # ui64-to-i32 conversion
+   operation("u642i", 1, source_types=(uint64_type,), dest_type=int_type, c_expression="{src0}"),
+   operation("i642u", 1, source_types=(int64_type,), dest_type=uint_type, c_expression="{src0}"),
+   operation("u642u", 1, source_types=(uint64_type,), dest_type=uint_type, c_expression="{src0}"),
+   operation("i642b", 1, source_types=(int64_type,), dest_type=bool_type, c_expression="{src0} != 0"),
+   operation("i642f", 1, source_types=(int64_type,), dest_type=float_type, c_expression="{src0}"),
+   operation("u642f", 1, source_types=(uint64_type,), dest_type=float_type, c_expression="{src0}"),
+   operation("i642d", 1, source_types=(int64_type,), dest_type=double_type, c_expression="{src0}"),
+   operation("u642d", 1, source_types=(uint64_type,), dest_type=double_type, c_expression="{src0}"),
+   operation("i2i64", 1, source_types=(int_type,), dest_type=int64_type, c_expression="{src0}"),
+   operation("u2i64", 1, source_types=(uint_type,), dest_type=int64_type, c_expression="{src0}"),
+   operation("b2i64", 1, source_types=(bool_type,), dest_type=int64_type, c_expression="{src0}"),
+   operation("f2i64", 1, source_types=(float_type,), dest_type=int64_type, c_expression="{src0}"),
+   operation("d2i64", 1, source_types=(double_type,), dest_type=int64_type, c_expression="{src0}"),
+   operation("i2u64", 1, source_types=(int_type,), dest_type=uint64_type, c_expression="{src0}"),
+   operation("u2u64", 1, source_types=(uint_type,), dest_type=uint64_type, c_expression="{src0}"),
+   operation("f2u64", 1, source_types=(float_type,), dest_type=uint64_type, c_expression="{src0}"),
+   operation("d2u64", 1, source_types=(double_type,), dest_type=uint64_type, c_expression="{src0}"),
+   operation("u642i64", 1, source_types=(uint64_type,), dest_type=int64_type, c_expression="{src0}"),
+   operation("i642u64", 1, source_types=(int64_type,), dest_type=uint64_type, c_expression="{src0}"),
+
 
    # Unary floating-point rounding operations.
    operation("trunc", 1, source_types=real_types, c_expression={'f': "truncf({src0})", 'd': "trunc({src0})"}),
@@ -482,6 +528,7 @@ ir_expression_operation = [
    # Trigonometric operations.
    operation("sin", 1, source_types=(float_type,), c_expression="sinf({src0})"),
    operation("cos", 1, source_types=(float_type,), c_expression="cosf({src0})"),
+   operation("atan", 1, source_types=(float_type,), c_expression="atan({src0})"),
 
    # Partial derivatives.
    operation("dFdx", 1, source_types=(float_type,), c_expression="0.0f"),
@@ -504,22 +551,27 @@ ir_expression_operation = [
    operation("unpack_half_2x16", 1, printable_name="unpackHalf2x16", source_types=(uint_type,), dest_type=float_type, c_expression="unpack_2x16(unpack_half_1x16, op[0]->value.u[0], &data.f[0], &data.f[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
 
    # Bit operations, part of ARB_gpu_shader5.
-   operation("bitfield_reverse", 1, source_types=integer_types, c_expression="bitfield_reverse({src0})"),
-   operation("bit_count", 1, source_types=integer_types, dest_type=int_type, c_expression="_mesa_bitcount({src0})"),
-   operation("find_msb", 1, source_types=integer_types, dest_type=int_type, c_expression={'u': "find_msb_uint({src0})", 'i': "find_msb_int({src0})"}),
-   operation("find_lsb", 1, source_types=integer_types, dest_type=int_type, c_expression="find_msb_uint({src0} & -{src0})"),
+   operation("bitfield_reverse", 1, source_types=(uint_type, int_type), c_expression="bitfield_reverse({src0})"),
+   operation("bit_count", 1, source_types=(uint_type, int_type), dest_type=int_type, c_expression="util_bitcount({src0})"),
+   operation("find_msb", 1, source_types=(uint_type, int_type), dest_type=int_type, c_expression={'u': "find_msb_uint({src0})", 'i': "find_msb_int({src0})"}),
+   operation("find_lsb", 1, source_types=(uint_type, int_type), dest_type=int_type, c_expression="find_msb_uint({src0} & -{src0})"),
+   operation("clz", 1, source_types=(uint_type,), dest_type=uint_type, c_expression="(unsigned)(31 - find_msb_uint({src0}))"),
 
    operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"),
 
    # Double packing, part of ARB_gpu_shader_fp64.
-   operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
-   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+
+   # Sampler/Image packing, part of ARB_bindless_texture.
+   operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
 
    operation("frexp_sig", 1),
    operation("frexp_exp", 1),
 
-   operation("noise", 1),
-
    operation("subroutine_to_int", 1),
 
    # Interpolate fs input at centroid
@@ -539,17 +591,43 @@ ir_expression_operation = [
    # of its length.
    operation("ssbo_unsized_array_length", 1),
 
-   # Vote among threads on the value of the boolean argument.
-   operation("vote_any", 1),
-   operation("vote_all", 1),
-   operation("vote_eq", 1),
+   # 64-bit integer packing ops.
+   operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+   operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
 
    operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation),
    operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation),
+   operation("add_sat", 2, printable_name="add_sat", source_types=integer_types, c_expression={
+      'u': "({src0} + {src1}) < {src0} ? UINT32_MAX : ({src0} + {src1})",
+      'i': "iadd_saturate({src0}, {src1})",
+      'u64': "({src0} + {src1}) < {src0} ? UINT64_MAX : ({src0} + {src1})",
+      'i64': "iadd64_saturate({src0}, {src1})"
+   }),
+   operation("sub_sat", 2, printable_name="sub_sat", source_types=integer_types, c_expression={
+      'u': "({src1} > {src0}) ? 0 : {src0} - {src1}",
+      'i': "isub_saturate({src0}, {src1})",
+      'u64': "({src1} > {src0}) ? 0 : {src0} - {src1}",
+      'i64': "isub64_saturate({src0}, {src1})"
+   }),
+   operation("abs_sub", 2, printable_name="abs_sub", source_types=integer_types, c_expression={
+      'u': "({src1} > {src0}) ? {src1} - {src0} : {src0} - {src1}",
+      'i': "({src1} > {src0}) ? (unsigned){src1} - (unsigned){src0} : (unsigned){src0} - (unsigned){src1}",
+      'u64': "({src1} > {src0}) ? {src1} - {src0} : {src0} - {src1}",
+      'i64': "({src1} > {src0}) ? (uint64_t){src1} - (uint64_t){src0} : (uint64_t){src0} - (uint64_t){src1}",
+   }),
+   operation("avg", 2, printable_name="average", source_types=integer_types, c_expression="({src0} >> 1) + ({src1} >> 1) + (({src0} & {src1}) & 1)"),
+   operation("avg_round", 2, printable_name="average_rounded", source_types=integer_types, c_expression="({src0} >> 1) + ({src1} >> 1) + (({src0} | {src1}) & 1)"),
+
    # "Floating-point or low 32-bit integer multiply."
    operation("mul", 2, printable_name="*", source_types=numeric_types, c_expression="{src0} * {src1}"),
+   operation("mul_32x16", 2, printable_name="*", source_types=(uint_type, int_type), c_expression={
+      'u': "{src0} * (uint16_t){src1}",
+      'i': "{src0} * (int16_t){src0}"
+   }),
    operation("imul_high", 2),       # Calculates the high 32-bits of a 64-bit multiply.
-   operation("div", 2, printable_name="/", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} / {src1}", 'i': "{src1} == 0 ? 0 : {src0} / {src1}", 'default': "{src0} / {src1}"}, flags=vector_scalar_operation),
+   operation("div", 2, printable_name="/", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} / {src1}", 'i': "{src1} == 0 ? 0 : {src0} / {src1}", 'u64': "{src1} == 0 ? 0 : {src0} / {src1}", 'i64': "{src1} == 0 ? 0 : {src0} / {src1}", 'default': "{src0} / {src1}"}, flags=vector_scalar_operation),
 
    # Returns the carry resulting from the addition of the two arguments.
    operation("carry", 2),
@@ -562,13 +640,11 @@ ir_expression_operation = [
    #
    # We don't use fmod because it rounds toward zero; GLSL specifies the use
    # of floor.
-   operation("mod", 2, printable_name="%", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} % {src1}", 'i': "{src1} == 0 ? 0 : {src0} % {src1}", 'f': "{src0} - {src1} * floorf({src0} / {src1})", 'd': "{src0} - {src1} * floor({src0} / {src1})"}, flags=vector_scalar_operation),
+   operation("mod", 2, printable_name="%", source_types=numeric_types, c_expression={'u': "{src1} == 0 ? 0 : {src0} % {src1}", 'i': "{src1} == 0 ? 0 : {src0} % {src1}", 'f': "{src0} - {src1} * floorf({src0} / {src1})", 'd': "{src0} - {src1} * floor({src0} / {src1})", 'u64': "{src1} == 0 ? 0 : {src0} % {src1}", 'i64': "{src1} == 0 ? 0 : {src0} % {src1}"}, flags=vector_scalar_operation),
 
    # Binary comparison operators which return a boolean vector.
    # The type of both operands must be equal.
    operation("less", 2, printable_name="<", source_types=numeric_types, dest_type=bool_type, c_expression="{src0} < {src1}"),
-   operation("greater", 2, printable_name=">", source_types=numeric_types, dest_type=bool_type, c_expression="{src0} > {src1}"),
-   operation("lequal", 2, printable_name="<=", source_types=numeric_types, dest_type=bool_type, c_expression="{src0} <= {src1}"),
    operation("gequal", 2, printable_name=">=", source_types=numeric_types, dest_type=bool_type, c_expression="{src0} >= {src1}"),
    operation("equal", 2, printable_name="==", source_types=all_types, dest_type=bool_type, c_expression="{src0} == {src1}"),
    operation("nequal", 2, printable_name="!=", source_types=all_types, dest_type=bool_type, c_expression="{src0} != {src1}"),
@@ -629,6 +705,8 @@ ir_expression_operation = [
    # operand1 is the sample ID
    operation("interpolate_at_sample", 2),
 
+   operation("atan2", 2, source_types=(float_type,), c_expression="atan2({src0}, {src1})"),
+
    # Fused floating-point multiply-add, part of ARB_gpu_shader5.
    operation("fma", 3, source_types=real_types, c_expression="{src0} * {src1} + {src2}"),
 
@@ -708,6 +786,12 @@ const char *const ir_expression_operation_strings[] = {
 % for item in values:
    "${item.printable_name}",
 % endfor
+};
+
+const char *const ir_expression_operation_enum_strings[] = {
+% for item in values:
+   "${item.name}",
+% endfor
 };""")
 
    constant_template = mako.template.Template("""\