nir/opcodes: Add new 'umul_low' and 'imadsh_mix16' opcodes

[mesa.git] / src / compiler / nir / nir_opcodes.py
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py

index fee9c221c163e852e7f1e13f08d914a0ceeb90f1..1ab4a3e7a315aaba1eb79fa0eb82d03728488b48 100644 (file)
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -185,10 +185,7 @@ def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
  def unop_numeric_convert(name, out_type, in_type, const_expr):
     opcode(name, 0, out_type, [0], [in_type], True, "", const_expr)
  
-# These two move instructions differ in what modifiers they support and what
-# the negate modifier means. Otherwise, they are identical.
-unop("fmov", tfloat, "src0")
-unop("imov", tint, "src0")
+unop("mov", tuint, "src0")
  
  unop("ineg", tint, "-src0")
  unop("fneg", tfloat, "-src0")
@@ -555,6 +552,13 @@ if (bit_size == 64) {
  }
  """)
  
+# low 32-bits of unsigned integer multiply
+binop("umul_low", tuint32, _2src_commutative, """
+uint64_t mask = (1 << (bit_size / 2)) - 1;
+dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask);
+""")
+
+
  binop("fdiv", tfloat, "", "src0 / src1")
  binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")
  binop("udiv", tuint, "", "src1 == 0 ? 0 : (src0 / src1)")
@@ -816,16 +820,16 @@ binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
  binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
  
  
-def triop(name, ty, const_expr):
-   opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, "", const_expr)
+def triop(name, ty, alg_props, const_expr):
+   opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, alg_props, const_expr)
  def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
     opcode(name, output_size, tuint,
     [src1_size, src2_size, src3_size],
     [tuint, tuint, tuint], False, "", const_expr)
  
-triop("ffma", tfloat, "src0 * src1 + src2")
+triop("ffma", tfloat, _2src_commutative, "src0 * src1 + src2")
  
-triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
  
  # Conditional Select
  #
@@ -834,20 +838,20 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
  # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
  
  
-triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
+triop("fcsel", tfloat32, "", "(src0 != 0.0f) ? src1 : src2")
  
  # 3 way min/max/med
-triop("fmin3", tfloat, "fminf(src0, fminf(src1, src2))")
-triop("imin3", tint, "MIN2(src0, MIN2(src1, src2))")
-triop("umin3", tuint, "MIN2(src0, MIN2(src1, src2))")
+triop("fmin3", tfloat, "", "fminf(src0, fminf(src1, src2))")
+triop("imin3", tint, "", "MIN2(src0, MIN2(src1, src2))")
+triop("umin3", tuint, "", "MIN2(src0, MIN2(src1, src2))")
  
-triop("fmax3", tfloat, "fmaxf(src0, fmaxf(src1, src2))")
-triop("imax3", tint, "MAX2(src0, MAX2(src1, src2))")
-triop("umax3", tuint, "MAX2(src0, MAX2(src1, src2))")
+triop("fmax3", tfloat, "", "fmaxf(src0, fmaxf(src1, src2))")
+triop("imax3", tint, "", "MAX2(src0, MAX2(src1, src2))")
+triop("umax3", tuint, "", "MAX2(src0, MAX2(src1, src2))")
  
-triop("fmed3", tfloat, "fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, src1))")
-triop("imed3", tint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
-triop("umed3", tuint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
+triop("fmed3", tfloat, "", "fmaxf(fminf(fmaxf(src0, src1), src2), fminf(src0, src1))")
+triop("imed3", tint, "", "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
+triop("umed3", tuint, "", "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
  
  opcode("bcsel", 0, tuint, [0, 0, 0],
        [tbool1, tuint, tuint], False, "", "src0 ? src1 : src2")
@@ -855,7 +859,7 @@ opcode("b32csel", 0, tuint, [0, 0, 0],
         [tbool32, tuint, tuint], False, "", "src0 ? src1 : src2")
  
  # SM5 bfi assembly
-triop("bfi", tuint32, """
+triop("bfi", tuint32, "", """
  unsigned mask = src0, insert = src1, base = src2;
  if (mask == 0) {
     dst = base;
@@ -961,4 +965,10 @@ dst.z = src2.x;
  dst.w = src3.x;
  """)
  
-
+# ir3-specific instruction that maps directly to mul-add shift high mix,
+# (IMADSH_MIX16 i.e. ah * bl << 16 + c). It is used for lowering integer
+# multiplication (imul) on Freedreno backend..
+opcode("imadsh_mix16", 1, tint32,
+       [1, 1, 1], [tint32, tint32, tint32], False, "", """
+dst.x = ((((src0.x & 0xffff0000) >> 16) * (src1.x & 0x0000ffff)) << 16) + src2.x;
+""")