From dc16086e3b616f767ab31c86505502c4df7739ac Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 2 Feb 2016 03:36:19 +0100 Subject: [PATCH] tgsi: add PK2H/UP2H support The util functions handle the half-float conversion. Note that piglit won't like it much due to: a) The util functions use magic float mul conversion but when run inside softpipe/llvmpipe, denorms are flushed to zero, therefore when the conversion is from/to f16 denorm the result will be zero. This is a bug which should be fixed in these functions (should not rely on denorms being available), but will happen elsewhere just the same (e.g. conversion to f16 render targets). b) The util functions use trunc round mode rather than round-to-nearest. This is NOT a bug (as it is a d3d10 requirement). This will result of rounding not representable finite values to MAX_F16 rather than INFINITY. My belief is the piglit tests are wrong here but it's difficult to tell (generally glsl rounding mode is undefined, however I'm not sure if rounding mode might need to be consistent for different operations). Nevertheless, for gl it would be better to use round-to-nearest, but using different rounding for GL and d3d10 is an unsolved problem (as it affects things like conversion to f16 render targets, clear colors, this shader opcode). Hence for now don't enable the cap bit (so the code is unused). (Code is from imirkin, comment from sroland) Signed-off-by: Ilia Mirkin Reviewed-by: Roland Scheidegger Reviewed-by: Brian Paul --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 44 ++++++++++++++++++++++++-- src/gallium/auxiliary/util/u_half.h | 7 +++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f67c16200a9..d898fd66f48 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -58,6 +58,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_half.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -3057,6 +3058,45 @@ exec_dp2(struct tgsi_exec_machine *mach, } } +static void +exec_pk2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg[2], dst; + + fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst.u[chan] = util_float_to_half(arg[0].f[chan]) | + (util_float_to_half(arg[1].f[chan]) << 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); + } + } +} + +static void +exec_up2h(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned chan; + union tgsi_exec_channel arg, dst[2]; + + fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); + for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { + dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); + dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); + } + } +} + static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) @@ -4339,7 +4379,7 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - assert (0); + exec_pk2h(mach, inst); break; case TGSI_OPCODE_PK2US: @@ -4425,7 +4465,7 @@ exec_instruction( break; case TGSI_OPCODE_UP2H: - assert (0); + exec_up2h(mach, inst); break; case TGSI_OPCODE_UP2US: diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h index d28fae3c77d..966d213bdd5 100644 --- a/src/gallium/auxiliary/util/u_half.h +++ b/src/gallium/auxiliary/util/u_half.h @@ -74,7 +74,11 @@ util_float_to_half(float f) f32.ui &= round_mask; f32.f *= magic.f; f32.ui -= round_mask; - + /* + * XXX: The magic mul relies on denorms being available, otherwise + * all f16 denorms get flushed to zero - hence when this is used + * for tgsi_exec in softpipe we won't get f16 denorms. + */ /* * Clamp to max finite value if overflowed. * OpenGL has completely undefined rounding behavior for float to @@ -112,6 +116,7 @@ util_half_to_float(uint16_t f16) /* Adjust */ f32.f *= magic.f; + /* XXX: The magic mul relies on denorms being available */ /* Inf / NaN */ if (f32.f >= infnan.f) -- 2.30.2