['VPERMD', ['a', 'idx'], 'a'],
['VPERMPS', ['idx', 'a'], 'a'],
['VCVTPD2PS', ['a'], 'VectorType::get(mFP32Ty, a->getType()->getVectorNumElements())'],
- ['VCVTPH2PS', ['a'], 'VectorType::get(mFP32Ty, a->getType()->getVectorNumElements())'],
['VCVTPS2PH', ['a', 'round'], 'mSimdInt16Ty'],
['VHSUBPS', ['a', 'b'], 'a'],
['VPTESTC', ['a', 'b'], 'mInt32Ty'],
/// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
{
- if (JM()->mArch.F16C())
- {
- return VCVTPH2PS(a, name);
- }
- else
- {
- FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty);
- Function* pCvtPh2Ps = cast<Function>(
-#if LLVM_VERSION_MAJOR >= 9
- JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy).getCallee());
-#else
- JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy));
-#endif
+ // Bitcast Nxint16 to Nxhalf
+ uint32_t numElems = a->getType()->getVectorNumElements();
+ Value* input = BITCAST(a, VectorType::get(mFP16Ty, numElems));
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr)
- {
- sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32",
- (void*)&ConvertFloat16ToFloat32);
- }
-
- Value* pResult = UndefValue::get(mSimdFP32Ty);
- for (uint32_t i = 0; i < mVWidth; ++i)
- {
- Value* pSrc = VEXTRACT(a, C(i));
- Value* pConv = CALL(pCvtPh2Ps, std::initializer_list<Value*>{pSrc});
- pResult = VINSERT(pResult, pConv, C(i));
- }
-
- pResult->setName(name);
- return pResult;
- }
+ return FP_EXT(input, VectorType::get(mFP32Ty, numElems), name);
}
//////////////////////////////////////////////////////////////////////////
{"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
{"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
{"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
{"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
{"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
},
{"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
{"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
{"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
{"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
{"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
},
#else
{"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
#endif
- {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512}, NO_EMU}},
{"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
{"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
}};