nir: Add a store_reg helper and use the builder in phis_to_regs

[mesa.git] / src / compiler / nir / nir_opcodes.py
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py

index 9e335445a3142f9b2ddb4077b6ffdb0ebb2d46f8..e1c9788b4f05e7c73fe5a2e2717eee9b0a7377ee 100644 (file)
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -199,9 +199,7 @@ unop("fsign", tfloat, ("bit_size == 64 ? " +
  unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
  unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
  unop("fabs", tfloat, "fabs(src0)")
-unop("fsat", tfloat, ("bit_size == 64 ? " +
-                      "((src0 > 1.0) ? 1.0 : ((src0 <= 0.0) ? 0.0 : src0)) : " +
-                      "((src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0))"))
+unop("fsat", tfloat, ("fmin(fmax(src0, 0.0), 1.0)"))
  unop("frcp", tfloat, "bit_size == 64 ? 1.0 / src0 : 1.0f / src0")
  unop("frsq", tfloat, "bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0)")
  unop("fsqrt", tfloat, "bit_size == 64 ? sqrt(src0) : sqrtf(src0)")
@@ -211,7 +209,7 @@ unop("flog2", tfloat, "log2f(src0)")
  # Generate all of the numeric conversion opcodes
  for src_t in [tint, tuint, tfloat, tbool]:
     if src_t == tbool:
-      dst_types = [tfloat, tint]
+      dst_types = [tfloat, tint, tbool]
     elif src_t == tint:
        dst_types = [tfloat, tint, tbool]
     elif src_t == tuint:
@@ -458,12 +456,6 @@ for (unsigned bit = 0; bit < bit_size; bit++) {
  }
  """)
  
-
-for i in range(1, 5):
-   for j in range(1, 5):
-      unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
-
-
  # AMD_gcn_shader extended instructions
  unop_horiz("cube_face_coord", 2, tfloat32, 3, tfloat32, """
  dst.x = dst.y = 0.0;
@@ -488,9 +480,9 @@ dst.y = dst.y / ma + 0.5;
  """)
  
  unop_horiz("cube_face_index", 1, tfloat32, 3, tfloat32, """
-float absX = fabs(src0.x);
-float absY = fabs(src0.y);
-float absZ = fabs(src0.z);
+float absX = fabsf(src0.x);
+float absY = fabsf(src0.y);
+float absZ = fabsf(src0.z);
  if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0;
  if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1;
  if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2;
@@ -1129,9 +1121,9 @@ binop("amul", tint, _2src_commutative + associative, "src0 * src1")
  # ir3-specific instruction that maps directly to mul-add shift high mix,
  # (IMADSH_MIX16 i.e. ah * bl << 16 + c). It is used for lowering integer
  # multiplication (imul) on Freedreno backend..
-opcode("imadsh_mix16", 1, tint32,
-       [1, 1, 1], [tint32, tint32, tint32], False, "", """
-dst.x = ((((src0.x & 0xffff0000) >> 16) * (src1.x & 0x0000ffff)) << 16) + src2.x;
+opcode("imadsh_mix16", 0, tint32,
+       [0, 0, 0], [tint32, tint32, tint32], False, "", """
+dst = ((((src0 & 0xffff0000) >> 16) * (src1 & 0x0000ffff)) << 16) + src2;
  """)
  
  # ir3-specific instruction that maps directly to ir3 mad.s24.
@@ -1143,3 +1135,11 @@ triop("imad24_ir3", tint32, _2src_commutative,
  # 24b multiply into 32b result (with sign extension)
  binop("imul24", tint32, _2src_commutative + associative,
        "(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)")
+
+# unsigned 24b multiply into 32b result plus 32b int
+triop("umad24", tuint32, _2src_commutative,
+      "(((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8) + src2")
+
+# unsigned 24b multiply into 32b result uint
+binop("umul24", tint32, _2src_commutative + associative,
+      "(((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8)")