From ad66b25415745383aa9380975f16967bfb1022a8 Mon Sep 17 00:00:00 2001 From: Krzysztof Raszkowski Date: Fri, 6 Mar 2020 16:09:24 +0100 Subject: [PATCH] gallium/swr: Fix vcvtph2ps llvm intrinsic compile error Reviewed-by: Jan Zielinski Tested-by: Marge Bot Part-of: --- .../rasterizer/codegen/gen_llvm_ir_macros.py | 1 - .../swr/rasterizer/jitter/builder_misc.cpp | 34 +++---------------- .../jitter/functionpasses/lower_x86.cpp | 3 -- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 656a7fa3cd3..b35a2a4ae24 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -53,7 +53,6 @@ intrinsics = [ ['VPERMD', ['a', 'idx'], 'a'], ['VPERMPS', ['idx', 'a'], 'a'], ['VCVTPD2PS', ['a'], 'VectorType::get(mFP32Ty, a->getType()->getVectorNumElements())'], - ['VCVTPH2PS', ['a'], 'VectorType::get(mFP32Ty, a->getType()->getVectorNumElements())'], ['VCVTPS2PH', ['a', 'round'], 'mSimdInt16Ty'], ['VHSUBPS', ['a', 'b'], 'a'], ['VPTESTC', ['a', 'b'], 'mInt32Ty'], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 4f6b0b46a60..96175692259 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -621,37 +621,11 @@ namespace SwrJit /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format. Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name) { - if (JM()->mArch.F16C()) - { - return VCVTPH2PS(a, name); - } - else - { - FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty); - Function* pCvtPh2Ps = cast( -#if LLVM_VERSION_MAJOR >= 9 - JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy).getCallee()); -#else - JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy)); -#endif + // Bitcast Nxint16 to Nxhalf + uint32_t numElems = a->getType()->getVectorNumElements(); + Value* input = BITCAST(a, VectorType::get(mFP16Ty, numElems)); - if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr) - { - sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32", - (void*)&ConvertFloat16ToFloat32); - } - - Value* pResult = UndefValue::get(mSimdFP32Ty); - for (uint32_t i = 0; i < mVWidth; ++i) - { - Value* pSrc = VEXTRACT(a, C(i)); - Value* pConv = CALL(pCvtPh2Ps, std::initializer_list{pSrc}); - pResult = VINSERT(pResult, pConv, C(i)); - } - - pResult->setName(name); - return pResult; - } + return FP_EXT(input, VectorType::get(mFP32Ty, numElems), name); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp index 4eb0162d5d9..3c728084afe 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp @@ -127,7 +127,6 @@ namespace SwrJit {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, - {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, }, @@ -141,7 +140,6 @@ namespace SwrJit {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}}, {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}}, {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}}, - {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}}, {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}}, {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}}, }, @@ -164,7 +162,6 @@ namespace SwrJit #else {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}}, #endif - {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512}, NO_EMU}}, {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}}, {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}}, }}; -- 2.30.2