From 548da20b22d43285fd919a4eaab8ef549b36b91e Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Jul 2019 17:06:31 -0500 Subject: [PATCH] nir/lower_doubles: Handle fdiv and fsub directly Reviewed-by: Eric Anholt --- src/compiler/nir/nir.h | 4 +++- src/compiler/nir/nir_lower_double_ops.c | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6805d067a80..99fa4baa16a 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2476,7 +2476,9 @@ typedef enum { nir_lower_dfract = (1 << 6), nir_lower_dround_even = (1 << 7), nir_lower_dmod = (1 << 8), - nir_lower_fp64_full_software = (1 << 9), + nir_lower_dsub = (1 << 9), + nir_lower_ddiv = (1 << 10), + nir_lower_fp64_full_software = (1 << 11), } nir_lower_doubles_options; typedef struct nir_shader_compiler_options { diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index 211c9359558..45ac155de51 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -600,6 +600,8 @@ nir_lower_doubles_op_to_options_mask(nir_op opcode) case nir_op_ffract: return nir_lower_dfract; case nir_op_fround_even: return nir_lower_dround_even; case nir_op_fmod: return nir_lower_dmod; + case nir_op_fsub: return nir_lower_dsub; + case nir_op_fdiv: return nir_lower_ddiv; default: return 0; } } @@ -673,10 +675,21 @@ lower_doubles_instr(nir_builder *b, nir_instr *instr, void *_data) case nir_op_fround_even: return lower_round_even(b, src); + case nir_op_fdiv: + case nir_op_fsub: case nir_op_fmod: { nir_ssa_def *src1 = nir_mov_alu(b, alu->src[1], alu->dest.dest.ssa.num_components); - return lower_mod(b, src, src1); + switch (alu->op) { + case nir_op_fdiv: + return nir_fmul(b, src, nir_frcp(b, src1)); + case nir_op_fsub: + return nir_fadd(b, src, nir_fneg(b, src1)); + case nir_op_fmod: + return lower_mod(b, src, src1); + default: + unreachable("unhandled opcode"); + } } default: unreachable("unhandled opcode"); -- 2.30.2