From 942341bcd0128fb9d9caf68b33f603855e2f6d69 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 21 Jan 2018 12:31:51 -0500 Subject: [PATCH] freedreno/ir3: don't lower fsat Instead, if possible fold (sat) flag into src, otherwise use: (sat)max.f rD, rS, rS Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.h | 2 ++ .../drivers/freedreno/ir3/ir3_compiler_nir.c | 21 +++++++++++++++++++ src/gallium/drivers/freedreno/ir3/ir3_nir.c | 1 - 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 210df20b812..84235cc1cd2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -616,6 +616,8 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr) break; case OPC_ABSNEG_F: case OPC_ABSNEG_S: + if (instr->flags & IR3_INSTR_SAT) + return false; break; default: return false; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 009dcb30fa5..72e0f4fe288 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -964,6 +964,27 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) case nir_op_fmin: dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0); break; + case nir_op_fsat: + /* if there is just a single use of the src, and it supports + * (sat) bit, we can just fold the (sat) flag back to the + * src instruction and create a mov. This is easier for cp + * to eliminate. + * + * TODO probably opc_cat==4 is ok too + */ + if (alu->src[0].src.is_ssa && + (list_length(&alu->src[0].src.ssa->uses) == 1) && + ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) { + src[0]->flags |= IR3_INSTR_SAT; + dst[0] = ir3_MOV(b, src[0], TYPE_U32); + } else { + /* otherwise generate a max.f that saturates.. blob does + * similar (generating a cat2 mov using max.f) + */ + dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0); + dst[0]->flags |= IR3_INSTR_SAT; + } + break; case nir_op_fmul: dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0); break; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c index 2393306e19c..81a46be914e 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -37,7 +37,6 @@ static const nir_shader_compiler_options options = { .lower_fpow = true, - .lower_fsat = true, .lower_scmp = true, .lower_flrp32 = true, .lower_flrp64 = true, -- 2.30.2