unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
unop("fabs", tfloat, "fabs(src0)")
-unop("fsat", tfloat, ("bit_size == 64 ? " +
- "((src0 > 1.0) ? 1.0 : ((src0 <= 0.0) ? 0.0 : src0)) : " +
- "((src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0))"))
+unop("fsat", tfloat, ("fmin(fmax(src0, 0.0), 1.0)"))
unop("frcp", tfloat, "bit_size == 64 ? 1.0 / src0 : 1.0f / src0")
unop("frsq", tfloat, "bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0)")
unop("fsqrt", tfloat, "bit_size == 64 ? sqrt(src0) : sqrtf(src0)")
# Generate all of the numeric conversion opcodes
for src_t in [tint, tuint, tfloat, tbool]:
if src_t == tbool:
- dst_types = [tfloat, tint]
+ dst_types = [tfloat, tint, tbool]
elif src_t == tint:
dst_types = [tfloat, tint, tbool]
elif src_t == tuint:
}
""")
-
-for i in range(1, 5):
- for j in range(1, 5):
- unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
-
-
# AMD_gcn_shader extended instructions
unop_horiz("cube_face_coord", 2, tfloat32, 3, tfloat32, """
dst.x = dst.y = 0.0;
""")
unop_horiz("cube_face_index", 1, tfloat32, 3, tfloat32, """
-float absX = fabs(src0.x);
-float absY = fabs(src0.y);
-float absZ = fabs(src0.z);
+float absX = fabsf(src0.x);
+float absY = fabsf(src0.y);
+float absZ = fabsf(src0.z);
if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0;
if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1;
if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2;
# ir3-specific instruction that maps directly to mul-add shift high mix,
# (IMADSH_MIX16 i.e. ah * bl << 16 + c). It is used for lowering integer
# multiplication (imul) on Freedreno backend..
-opcode("imadsh_mix16", 1, tint32,
- [1, 1, 1], [tint32, tint32, tint32], False, "", """
-dst.x = ((((src0.x & 0xffff0000) >> 16) * (src1.x & 0x0000ffff)) << 16) + src2.x;
+opcode("imadsh_mix16", 0, tint32,
+ [0, 0, 0], [tint32, tint32, tint32], False, "", """
+dst = ((((src0 & 0xffff0000) >> 16) * (src1 & 0x0000ffff)) << 16) + src2;
""")
# ir3-specific instruction that maps directly to ir3 mad.s24.
# 24b multiply into 32b result (with sign extension)
binop("imul24", tint32, _2src_commutative + associative,
"(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)")
+
+# unsigned 24b multiply into 32b result plus 32b int
+triop("umad24", tuint32, _2src_commutative,
+ "(((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8) + src2")
+
+# unsigned 24b multiply into 32b result uint
+binop("umul24", tint32, _2src_commutative + associative,
+ "(((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8)")