From db07b46f2cb89d96a17a28a0453a236451b560c7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= Date: Tue, 26 Apr 2016 09:35:30 +0200 Subject: [PATCH] nir: Add lrp lowering for doubles in opt_algebraic MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Some hardware (i965 on Broadwell generation, for example) does not support natively the execution of lrp instruction with double arguments. Add 'lower_flrp64' flag to lower this instruction in that case. v2: - Rename lower_flrp_double to lower_flrp64 (Jason) - Fix typo (Jason) - Adapt the code to define bit_size information in the opcodes. Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_opt_algebraic.py | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d5eda02177a..e8899640d23 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1625,6 +1625,8 @@ typedef struct nir_shader_compiler_options { bool lower_fdiv; bool lower_ffma; bool lower_flrp32; + /** Lowers flrp when it does not support doubles */ + bool lower_flrp64; bool lower_fpow; bool lower_fsat; bool lower_fsqrt; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 082bcc126ff..60ee170b43f 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -98,12 +98,15 @@ optimizations = [ (('~flrp', a, a, b), a), (('~flrp', 0.0, a, b), ('fmul', a, b)), (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp32'), - (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp32'), + (('flrp@32', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp32'), + (('flrp@64', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp64'), (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'), - (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'), + (('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'), + (('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp64'), (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'), - (('~fadd', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'), + (('~fadd@32', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'), + (('~fadd@64', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'), (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), # Comparison simplifications -- 2.30.2