(('flrp', a, a, b), a),
(('flrp', 0.0, a, b), ('fmul', a, b)),
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+ (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
(('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
(('inot', ('fge', a, b)), ('flt', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
- (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
- (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
- (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+ (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
# Emulating booleans
(('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
(('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
(('fpow', a, 1.0), a),
(('fpow', a, 2.0), ('fmul', a, a)),
(('fpow', 2.0, a), ('fexp2', a)),
+ (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))),
+ (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('frcp', ('fexp', a)), ('fexp', ('fneg', a))),
+ (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))),
+ (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))),
+ (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('flog', ('frcp', a)), ('fneg', ('flog', a))),
+ (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))),
+ (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))),
+ (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))),
+ (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))),
+ (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))),
# Division and reciprocal
(('fdiv', 1.0, a), ('frcp', a)),
(('frcp', ('frcp', a)), a),
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
]
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished. Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+ (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+ (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+]
+
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+ late_optimizations).render()