(ishr, a, ('imin', ('iadd', ('iand', b, mask), ('iand', c, mask)), s - 1))),
])
+# Optimize a pattern of address calculation created by DXVK where the offset is
+# divided by 4 and then multipled by 4. This can be turned into an iand and the
+# additions before can be reassociated to CSE the iand instruction.
+for log2 in range(1, 7): # powers of two from 2 to 64
+ v = 1 << log2
+ mask = 0xffffffff & ~(v - 1)
+ b_is_multiple = '#b(is_unsigned_multiple_of_{})'.format(v)
+
+ optimizations.extend([
+ # 'a >> #b << #b' -> 'a & ~((1 << #b) - 1)'
+ (('ishl@32', ('ushr@32', a, log2), log2), ('iand', a, mask)),
+
+ # Reassociate for improved CSE
+ (('iand@32', ('iadd@32', a, b_is_multiple), mask), ('iadd', ('iand', a, mask), b)),
+ ])
+
optimizations.extend([
# This is common for address calculations. Reassociating may enable the
# 'a<<c' to be CSE'd. It also helps architectures that have an ISHLADD
(('umin', ('umin', a, b), b), ('umin', a, b)),
(('imin', ('imin', a, b), b), ('imin', a, b)),
(('iand@32', a, ('inot', ('ishr', a, 31))), ('imax', a, 0)),
- (('fmax', a, ('fneg', a)), ('fabs', a)),
- (('imax', a, ('ineg', a)), ('iabs', a)),
(('fmin', a, ('fneg', a)), ('fneg', ('fabs', a))),
(('imin', a, ('ineg', a)), ('ineg', ('iabs', a))),
(('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))),
(('unpack_half_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_half_2x16_split_y', a)),
(('unpack_32_2x16_split_y', ('iand', a, 0xffff0000)), ('unpack_32_2x16_split_y', a)),
(('unpack_64_2x32_split_y', ('iand', a, 0xffffffff00000000)), ('unpack_64_2x32_split_y', a)),
+
+ # Optimize half packing
+ (('ishl', ('pack_half_2x16', ('vec2', a, 0)), 16), ('pack_half_2x16', ('vec2', 0, a))),
+ (('ishr', ('pack_half_2x16', ('vec2', 0, a)), 16), ('pack_half_2x16', ('vec2', a, 0))),
+
+ (('iadd', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))),
+ ('pack_half_2x16', ('vec2', a, b))),
+ (('ior', ('pack_half_2x16', ('vec2', a, 0)), ('pack_half_2x16', ('vec2', 0, b))),
+ ('pack_half_2x16', ('vec2', a, b))),
])
# After the ('extract_u8', a, 0) pattern, above, triggers, there will be
('bcsel', ('ilt', a, ('isub', a, b)), intmin, ('isub', a, b))), 'options->lower_add_sat'),
]
-invert = OrderedDict([('feq', 'fne'), ('fne', 'feq'), ('fge', 'flt'), ('flt', 'fge')])
+invert = OrderedDict([('feq', 'fne'), ('fne', 'feq')])
for left, right in itertools.combinations_with_replacement(invert.keys(), 2):
optimizations.append((('inot', ('ior(is_used_once)', (left, a, b), (right, c, d))),