(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
(('inot', ('fge', a, b)), ('flt', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
- (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
- (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
- (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
# Emulating booleans
(('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
(('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
(('iand', 'a@bool', 1.0), ('b2f', a)),
+ (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+ (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args. Note that these are not done for
# the float versions because NaN always returns false on float
# inequalities.
(('fpow', a, 1.0), a),
(('fpow', a, 2.0), ('fmul', a, a)),
(('fpow', 2.0, a), ('fexp2', a)),
+ (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))),
+ (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('frcp', ('fexp', a)), ('fexp', ('fneg', a))),
+ (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))),
+ (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))),
+ (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('flog', ('frcp', a)), ('fneg', ('flog', a))),
+ (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))),
+ (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))),
+ (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))),
+ (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))),
+ (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))),
# Division and reciprocal
(('fdiv', 1.0, a), ('frcp', a)),
(('frcp', ('frcp', a)), a),
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
-
-# This one may not be exact
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
]
# Add optimizations to handle the case where the result of a ternary is
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
]
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished. Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+ (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+ (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+]
+
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+ late_optimizations).render()