From 9459863dfa16071ff5088a15d853028f2865c4a7 Mon Sep 17 00:00:00 2001 From: Alok Hota Date: Thu, 23 Aug 2018 18:42:25 -0500 Subject: [PATCH] swr/rast: partial support for Tiled Resources - updated sample from TRTT surfaces correctly - implemented mapped status return for TRTT surfaces - implemented per-sample instruction minLod clamp - updated bilinear filter weight calculation to be closer to D3D specs - implemented "ReducedTexcoordRange" operation from D3D specs to avoid loss of precision on high-value normalized coordinates --- .../swr/rasterizer/jitter/builder_misc.cpp | 142 ++++++++++++++++++ .../swr/rasterizer/jitter/builder_misc.h | 22 +++ 2 files changed, 164 insertions(+) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 26d8688f5e9..65eec4e4c68 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -764,6 +764,148 @@ namespace SwrJit /// @brief pop count on vector mask (e.g. <8 x i1>) Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); } + ////////////////////////////////////////////////////////////////////////// + /// @brief Float / Fixed-point conversions + ////////////////////////////////////////////////////////////////////////// + Value* Builder::VCVT_F32_FIXED_SI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + Value* fixed = nullptr; + if constexpr (false) // This doesn't work for negative numbers!! + { + fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)), + mSimdInt32Ty); + } + else + { + // Do round to nearest int on fractional bits first + // Not entirely perfect for negative numbers, but close enough + vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)); + vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); + + // TODO: Handle INF, NAN, overflow / underflow, etc. + + Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); + Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); + Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); + vFixed = OR(vFixed, VIMMED1(1 << 23)); + vFixed = SELECT(vSgn, NEG(vFixed), vFixed); + + Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); + vExp = SUB(vExp, VIMMED1(127)); + + Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); + + fixed = ASHR(vFixed, vExtraBits, name); + } + + return fixed; + } + + Value* Builder::VCVT_FIXED_SI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + uint32_t extraBits = 32 - numIntBits - numFracBits; + if (numIntBits && extraBits) + { + // Sign extend + Value* shftAmt = VIMMED1(extraBits); + vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); + } + + Value* fVal = VIMMED1(0.0f); + Value* fFrac = VIMMED1(0.0f); + if (numIntBits) + { + fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); + } + + if (numFracBits) + { + fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); + fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); + } + + return FADD(fVal, fFrac, name); + } + + Value* Builder::VCVT_F32_FIXED_UI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + Value* fixed = nullptr; + if constexpr (true) // KNOB_SIM_FAST_MATH? Below works correctly from a precision + // standpoint... + { + fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)), + mSimdInt32Ty); + } + else + { + // Do round to nearest int on fractional bits first + vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))), + C(_MM_FROUND_TO_NEAREST_INT)); + vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits))); + + // TODO: Handle INF, NAN, overflow / underflow, etc. + + Value* vSgn = FCMP_OLT(vFloat, VIMMED1(0.0f)); + Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty); + Value* vFixed = AND(vFloatInt, VIMMED1((1 << 23) - 1)); + vFixed = OR(vFixed, VIMMED1(1 << 23)); + + Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24)); + vExp = SUB(vExp, VIMMED1(127)); + + Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp); + + fixed = LSHR(vFixed, vExtraBits, name); + } + + return fixed; + } + + Value* Builder::VCVT_FIXED_UI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name) + { + SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values"); + uint32_t extraBits = 32 - numIntBits - numFracBits; + if (numIntBits && extraBits) + { + // Sign extend + Value* shftAmt = VIMMED1(extraBits); + vFixed = ASHR(SHL(vFixed, shftAmt), shftAmt); + } + + Value* fVal = VIMMED1(0.0f); + Value* fFrac = VIMMED1(0.0f); + if (numIntBits) + { + fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name); + } + + if (numFracBits) + { + fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty); + fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name); + } + + return FADD(fVal, fFrac, name); + } + ////////////////////////////////////////////////////////////////////////// /// @brief C functions called by LLVM IR ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h index f8701f9ba84..91e2a32f1a1 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h @@ -122,6 +122,28 @@ Value* VMASK_16(Value* mask); Value* VMOVMSK(Value* mask); +////////////////////////////////////////////////////////////////////////// +/// @brief Float / Fixed-point conversions +////////////////////////////////////////////////////////////////////////// +// Signed +Value* VCVT_F32_FIXED_SI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +Value* VCVT_FIXED_SI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +// Unsigned +Value* VCVT_F32_FIXED_UI(Value* vFloat, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); +Value* VCVT_FIXED_UI_F32(Value* vFixed, + uint32_t numIntBits, + uint32_t numFracBits, + const llvm::Twine& name = ""); + ////////////////////////////////////////////////////////////////////////// /// @brief functions that build IR to call x86 intrinsics directly, or /// emulate them with other instructions if not available on the host -- 2.30.2