return unpackUint2x32(a).y & 0x80000000u;
}
-/* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
- * than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise,
- * returns false.
+/* Returns true if the signed 64-bit value formed by concatenating `a0' and
+ * `a1' is less than the signed 64-bit value formed by concatenating `b0' and
+ * `b1'. Otherwise, returns false.
*/
bool
-lt64(uint a0, uint a1, uint b0, uint b1)
+ilt64(uint a0, uint a1, uint b0, uint b1)
{
- return (a0 < b0) || ((a0 == b0) && (a1 < b1));
+ return (int(a0) < int(b0)) || ((a0 == b0) && (a1 < b1));
}
bool
{
uvec2 a = unpackUint2x32(__a);
uvec2 b = unpackUint2x32(__b);
- uint aSign = __extractFloat64Sign(__a);
- uint bSign = __extractFloat64Sign(__b);
- if (aSign != bSign)
- return (aSign != 0u) && ((((a.y | b.y)<<1) | a.x | b.x) != 0u);
- return mix(lt64(a.y, a.x, b.y, b.x), lt64(b.y, b.x, a.y, a.x), aSign != 0u);
+ /* IEEE 754 floating point numbers are specifically designed so that, with
+ * two exceptions, values can be compared by bit-casting to signed integers
+ * with the same number of bits.
+ *
+ * From https://en.wikipedia.org/wiki/IEEE_754-1985#Comparing_floating-point_numbers:
+ *
+ * When comparing as 2's-complement integers: If the sign bits differ,
+ * the negative number precedes the positive number, so 2's complement
+ * gives the correct result (except that negative zero and positive zero
+ * should be considered equal). If both values are positive, the 2's
+ * complement comparison again gives the correct result. Otherwise (two
+ * negative numbers), the correct FP ordering is the opposite of the 2's
+ * complement ordering.
+ *
+ * The logic implied by the above quotation is:
+ *
+ * !both_are_zero(a, b) && (both_negative(a, b) ? a > b : a < b)
+ *
+ * This is equivalent to
+ *
+ * fne(a, b) && (both_negative(a, b) ? a >= b : a < b)
+ *
+ * fne(a, b) && (both_negative(a, b) ? !(a < b) : a < b)
+ *
+ * fne(a, b) && ((both_negative(a, b) && !(a < b)) ||
+ * (!both_negative(a, b) && (a < b)))
+ *
+ * (A!|B)&(A|!B) is (A xor B) which is implemented here using !=.
+ *
+ * fne(a, b) && (both_negative(a, b) != (a < b))
+ */
+ bool lt = ilt64(a.y, a.x, b.y, b.x);
+ bool both_negative = (a.y & b.y & 0x80000000u) != 0;
+
+ return !__feq64_nonnan(__a, __b) && (lt != both_negative);
}
/* Returns true if the double-precision floating-point value `a' is less than
bool
__flt64(uint64_t a, uint64_t b)
{
- if (__is_nan(a) || __is_nan(b))
- return false;
+ /* This weird layout matters. Doing the "obvious" thing results in extra
+ * flow control being inserted to implement the short-circuit evaluation
+ * rules. Flow control is bad!
+ */
+ bool x = !__is_nan(a);
+ bool y = !__is_nan(b);
+ bool z = __flt64_nonnan(a, b);
- return __flt64_nonnan(a, b);
+ return (x && y && z);
}
/* Returns true if the double-precision floating-point value `a' is greater
bool
__fge64(uint64_t a, uint64_t b)
{
- if (__is_nan(a) || __is_nan(b))
- return false;
+ /* This weird layout matters. Doing the "obvious" thing results in extra
+ * flow control being inserted to implement the short-circuit evaluation
+ * rules. Flow control is bad!
+ */
+ bool x = !__is_nan(a);
+ bool y = !__is_nan(b);
+ bool z = !__flt64_nonnan(a, b);
- return !__flt64_nonnan(a, b);
+ return (x && y && z);
}
uint64_t
(('iand', ('ieq', 'a@32', 0), ('ieq', 'b@32', 0)), ('ieq', ('ior', a, b), 0), '!options->lower_bitops'),
(('ior', ('ine', 'a@32', 0), ('ine', 'b@32', 0)), ('ine', ('ior', a, b), 0), '!options->lower_bitops'),
+ # This pattern occurs coutresy of __flt64_nonnan in the soft-fp64 code.
+ # The first part of the iand comes from the !__feq64_nonnan.
+ #
+ # The second pattern is a reformulation of the first based on the relation
+ # (a == 0 || y == 0) <=> umin(a, y) == 0, where b in the first equation
+ # happens to be y == 0.
+ (('iand', ('inot', ('iand', ('ior', ('ieq', a, 0), b), c)), ('ilt', a, 0)),
+ ('iand', ('inot', ('iand', b , c)), ('ilt', a, 0))),
+ (('iand', ('inot', ('iand', ('ieq', ('umin', a, b), 0), c)), ('ilt', a, 0)),
+ ('iand', ('inot', ('iand', ('ieq', b , 0), c)), ('ilt', a, 0))),
+
# These patterns can result when (a < b || a < c) => (a < min(b, c))
# transformations occur before constant propagation and loop-unrolling.
(('~flt', a, ('fmax', b, a)), ('flt', a, b)),