+def fexp2i(exp, bits):
+ # We assume that exp is already in the right range.
+ if bits == 32:
+ return ('ishl', ('iadd', exp, 127), 23)
+ elif bits == 64:
+ return ('pack_64_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20))
+ else:
+ assert False
+
+def ldexp(f, exp, bits):
+ # First, we clamp exp to a reasonable range. The maximum possible range
+ # for a normal exponent is [-126, 127] and, throwing in denormals, you get
+ # a maximum range of [-149, 127]. This means that we can potentially have
+ # a swing of +-276. If you start with FLT_MAX, you actually have to do
+ # ldexp(FLT_MAX, -278) to get it to flush all the way to zero. The GLSL
+ # spec, on the other hand, only requires that we handle an exponent value
+ # in the range [-126, 128]. This implementation is *mostly* correct; it
+ # handles a range on exp of [-252, 254] which allows you to create any
+ # value (including denorms if the hardware supports it) and to adjust the
+ # exponent of any normal value to anything you want.
+ if bits == 32:
+ exp = ('imin', ('imax', exp, -252), 254)
+ elif bits == 64:
+ exp = ('imin', ('imax', exp, -2044), 2046)
+ else:
+ assert False
+
+ # Now we compute two powers of 2, one for exp/2 and one for exp-exp/2.
+ # (We use ishr which isn't the same for -1, but the -1 case still works
+ # since we use exp-exp/2 as the second exponent.) While the spec
+ # technically defines ldexp as f * 2.0^exp, simply multiplying once doesn't
+ # work with denormals and doesn't allow for the full swing in exponents
+ # that you can get with normalized values. Instead, we create two powers
+ # of two and multiply by them each in turn. That way the effective range
+ # of our exponent is doubled.
+ pow2_1 = fexp2i(('ishr', exp, 1), bits)
+ pow2_2 = fexp2i(('isub', exp, ('ishr', exp, 1)), bits)
+ return ('fmul', ('fmul', f, pow2_1), pow2_2)
+
+optimizations += [
+ (('ldexp@32', 'x', 'exp'), ldexp('x', 'exp', 32)),
+ (('ldexp@64', 'x', 'exp'), ldexp('x', 'exp', 64)),
+]
+
+# Unreal Engine 4 demo applications open-codes bitfieldReverse()
+def bitfield_reverse(u):
+ step1 = ('ior', ('ishl', u, 16), ('ushr', u, 16))
+ step2 = ('ior', ('ishl', ('iand', step1, 0x00ff00ff), 8), ('ushr', ('iand', step1, 0xff00ff00), 8))
+ step3 = ('ior', ('ishl', ('iand', step2, 0x0f0f0f0f), 4), ('ushr', ('iand', step2, 0xf0f0f0f0), 4))
+ step4 = ('ior', ('ishl', ('iand', step3, 0x33333333), 2), ('ushr', ('iand', step3, 0xcccccccc), 2))
+ step5 = ('ior', ('ishl', ('iand', step4, 0x55555555), 1), ('ushr', ('iand', step4, 0xaaaaaaaa), 1))
+
+ return step5
+
+optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'))]
+
+# For any float comparison operation, "cmp", if you have "a == a && a cmp b"
+# then the "a == a" is redundant because it's equivalent to "a is not NaN"
+# and, if a is a NaN then the second comparison will fail anyway.
+for op in ['flt', 'fge', 'feq']:
+ optimizations += [
+ (('iand', ('feq', a, a), (op, a, b)), (op, a, b)),
+ (('iand', ('feq', a, a), (op, b, a)), (op, b, a)),
+ ]
+