From 4f5bad649be3914a6965bda97ca275de989bb7c0 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 7 May 2020 15:43:21 -0400 Subject: [PATCH] pan/mdg: Don't generate conversions for fp16 LUTs We can just set the register mode appropriately and then we don't have to care anywhere else, and there's no extra NIR to chew through. Make sure we include sqrt too. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/midgard_compile.c | 21 +++++++++++++++++++ src/panfrost/midgard/midgard_nir_algebraic.py | 6 ------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 4b97013ebc9..643fe114b0d 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -561,6 +561,27 @@ reg_mode_for_nir(nir_alu_instr *instr) unsigned dst_bitsize = nir_dest_bit_size(instr->dest.dest); unsigned max_bitsize = MAX2(src_bitsize, dst_bitsize); + /* We don't have fp16 LUTs, so we'll want to emit code like: + * + * vlut.fsinr hr0, hr0 + * + * where both input and output are 16-bit but the operation is carried + * out in 32-bit + */ + + switch (instr->op) { + case nir_op_fsqrt: + case nir_op_frcp: + case nir_op_frsq: + case nir_op_fsin: + case nir_op_fcos: + case nir_op_fexp2: + case nir_op_flog2: + max_bitsize = MAX2(max_bitsize, 32); + default: + break; + } + switch (max_bitsize) { case 8: return midgard_reg_mode_8; diff --git a/src/panfrost/midgard/midgard_nir_algebraic.py b/src/panfrost/midgard/midgard_nir_algebraic.py index 8dc72bcf561..daaaafdd30e 100644 --- a/src/panfrost/midgard/midgard_nir_algebraic.py +++ b/src/panfrost/midgard/midgard_nir_algebraic.py @@ -93,12 +93,6 @@ for op in ('u2u', 'i2i', 'f2f', 'i2f', 'u2f', 'f2i', 'f2u'): srcsz *= 2 dstsz *= 2 -# Midgard outputs fp32 for specials. The f2f32 will be folded in later. -SPECIAL = ['fexp2', 'flog2', 'fsin', 'fcos', 'frcp', 'frsq'] - -for op in SPECIAL: - converts += [((op + '@16', a), ('f2f16', (op, ('f2f32', a))))] - # Try to force constants to the right constant_switch = [ # fge gets flipped to fle, so we invert to keep the order -- 2.30.2