We can just set the register mode appropriately and then we don't have
to care anywhere else, and there's no extra NIR to chew through. Make
sure we include sqrt too.
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5151>
unsigned dst_bitsize = nir_dest_bit_size(instr->dest.dest);
unsigned max_bitsize = MAX2(src_bitsize, dst_bitsize);
+ /* We don't have fp16 LUTs, so we'll want to emit code like:
+ *
+ * vlut.fsinr hr0, hr0
+ *
+ * where both input and output are 16-bit but the operation is carried
+ * out in 32-bit
+ */
+
+ switch (instr->op) {
+ case nir_op_fsqrt:
+ case nir_op_frcp:
+ case nir_op_frsq:
+ case nir_op_fsin:
+ case nir_op_fcos:
+ case nir_op_fexp2:
+ case nir_op_flog2:
+ max_bitsize = MAX2(max_bitsize, 32);
+ default:
+ break;
+ }
+
switch (max_bitsize) {
case 8:
return midgard_reg_mode_8;
srcsz *= 2
dstsz *= 2
-# Midgard outputs fp32 for specials. The f2f32 will be folded in later.
-SPECIAL = ['fexp2', 'flog2', 'fsin', 'fcos', 'frcp', 'frsq']
-
-for op in SPECIAL:
- converts += [((op + '@16', a), ('f2f16', (op, ('f2f32', a))))]
-
# Try to force constants to the right
constant_switch = [
# fge gets flipped to fle, so we invert to keep the order