From 3cbcb1b73e5f764ed87fdcd1dea8a921e73bfd82 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 24 Mar 2020 19:54:06 +0100 Subject: [PATCH] gallium/util: Add back (and rename) util_float_to_half implementation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This implementation was removed by 8b8af6d3 ("gallium/util: Switch util_float_to_half to _mesa_float_to_half()'s impl.") It was not actually broken, but _mesa_float_to_half() implements round-to-nearest-even, whereas util_float_to_half() implemented round-to-zero. So rename it appropriately. GL actually never cares about rounding (except a broken piglit test), however d3d10 very much does and requires RTZ for float to half conversion. Moreover, apparently at least radeon gpus actually always do RTZ when doing RT writes (and I'd suspect for shader image writes as well). Hence it seems appropriate to hook up this rtz function to the format instead. This will cause llvmpipe and softpipe to use rtz rounding for clears with half float formats, and softpipe would use rtz behavior for rt writes as well (llvmpipe has that hardcoded), not sure if "real" hw drivers hit this function for much. (For shader opcodes would still need to figure out what rounding to use appropriately, but this is a question for another day.) Note should probably unify with _mesa_float_to_float16_rtz. Unclear at this point which one is better, so just restore previous function here. Reviewed-by: Marek Olšák Tested-by: Marge Bot Part-of: --- src/gallium/auxiliary/util/u_half.h | 57 +++++++++++++++++++++++++++++ src/util/format/u_format_pack.py | 2 +- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h index a107dcb74b1..bbcc843c310 100644 --- a/src/gallium/auxiliary/util/u_half.h +++ b/src/gallium/auxiliary/util/u_half.h @@ -50,6 +50,63 @@ util_float_to_half(float f) return _mesa_float_to_half(f); } +static inline uint16_t +util_float_to_half_rtz(float f) +{ + uint32_t sign_mask = 0x80000000; + uint32_t round_mask = ~0xfff; + uint32_t f32inf = 0xff << 23; + uint32_t f16inf = 0x1f << 23; + uint32_t sign; + union fi magic; + union fi f32; + uint16_t f16; + + magic.ui = 0xf << 23; + + f32.f = f; + + /* Sign */ + sign = f32.ui & sign_mask; + f32.ui ^= sign; + + if (f32.ui == f32inf) { + /* Inf */ + f16 = 0x7c00; + } else if (f32.ui > f32inf) { + /* NaN */ + f16 = 0x7e00; + } else { + /* Number */ + f32.ui &= round_mask; + f32.f *= magic.f; + f32.ui -= round_mask; + /* + * XXX: The magic mul relies on denorms being available, otherwise + * all f16 denorms get flushed to zero - hence when this is used + * for tgsi_exec in softpipe we won't get f16 denorms. + */ + /* + * Clamp to max finite value if overflowed. + * OpenGL has completely undefined rounding behavior for float to + * half-float conversions, and this matches what is mandated for float + * to fp11/fp10, which recommend round-to-nearest-finite too. + * (d3d10 is deeply unhappy about flushing such values to infinity, and + * while it also mandates round-to-zero it doesn't care nearly as much + * about that.) + */ + if (f32.ui > f16inf) + f32.ui = f16inf - 1; + + f16 = f32.ui >> 13; + } + + /* Sign */ + f16 |= sign >> 16; + + return f16; +} + static inline float util_half_to_float(uint16_t f16) { diff --git a/src/util/format/u_format_pack.py b/src/util/format/u_format_pack.py index 5a1763d1e6f..c8749a2c047 100644 --- a/src/util/format/u_format_pack.py +++ b/src/util/format/u_format_pack.py @@ -437,7 +437,7 @@ def conversion_expr(src_channel, src_size = 32 if dst_channel.size == 16: - value = 'util_float_to_half(%s)' % value + value = 'util_float_to_half_rtz(%s)' % value elif dst_channel.size == 64 and src_size < 64: value = '(double)%s' % value -- 2.30.2