From: Erico Nunes <nunes.erico@gmail.com>
Date: Sat, 27 Jul 2019 15:58:53 +0000 (+0200)
Subject: nir/algebraic: add new fsum ops and fdot lowering
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4a407df68297b434c4489e6d28c2dd18e9eec326;p=mesa.git

nir/algebraic: add new fsum ops and fdot lowering

The Mali400 pp doesn't implement fdot but has fsum3 and fsum4, which can
be used to optimize fdot lowering. fsum2 is not implemented and can be
further lowered to an add with the vector components.
Currently lima ppir handles this lowering internally, however this
happens in a very late stage and requires a big chunk of code compared
to a nir_opt_algebraic lowering.
By having fsum in nir, we can reduce ppir complexity and enable the
lowered ops to be part of other nir optimizations in the optimization
loop.

Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a3c44ff988e..088b8d7d5e8 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2556,6 +2556,9 @@ typedef struct nir_shader_compiler_options {
    /* lower fdph to fdot4 */
    bool lower_fdph;
 
+   /** lower fdot to fmul and fsum/fadd. */
+   bool lower_fdot;
+
    /* Does the native fdot instruction replicate its result for four
     * components?  If so, then opt_algebraic_late will turn all fdotN
     * instructions into fdot_replicatedN instructions.
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 7854faec15f..3020da98264 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -439,6 +439,8 @@ if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4;
 if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
 """)
 
+# Sum of vector components
+unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}")
 
 def binop_convert(name, out_type, in_type, alg_props, const_expr):
    opcode(name, 0, out_type, [0, 0], [in_type, in_type],
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 351f467b246..9fc3be8f8b0 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -194,6 +194,12 @@ optimizations = [
    (('fdot2', ('vec2', a, 0.0), b), ('fmul', a, b)),
    (('fdot2', a, 1.0), ('fadd', 'a.x', 'a.y')),
 
+   # Lower fdot to fsum when it is available
+   (('fdot2', a, b), ('fsum2', ('fmul', a, b)), 'options->lower_fdot'),
+   (('fdot3', a, b), ('fsum3', ('fmul', a, b)), 'options->lower_fdot'),
+   (('fdot4', a, b), ('fsum4', ('fmul', a, b)), 'options->lower_fdot'),
+   (('fsum2', a), ('fadd', 'a.x', 'a.y'), 'options->lower_fdot'),
+
    # If x >= 0 and x <= 1: fsat(1 - x) == 1 - fsat(x) trivially
    # If x < 0: 1 - fsat(x) => 1 - 0 => 1 and fsat(1 - x) => fsat(> 1) => 1
    # If x > 1: 1 - fsat(x) => 1 - 1 => 0 and fsat(1 - x) => fsat(< 0) => 0