/* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
bool lower_scmp;
+ /* Does the native fdot instruction replicate its result for four
+ * components? If so, then opt_algebraic_late will turn all fdotN
+ * instructions into fdot_replicatedN instructions.
+ */
+ bool fdot_replicates;
+
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
* are simulated by floats.)
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
"{src}")
+binop_reduce("fdot_replicated", 4, tfloat, tfloat,
+ "{src0} * {src1}", "{src0} + {src1}", "{src}")
+
binop("fmin", tfloat, "", "fminf(src0, src1)")
binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
(('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
(('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
(('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+ (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
+ (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
+ (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
]
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()