// Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the
// previous behavior of mapping directly to avx/avx2 intrinsics.
- static std::map<std::string, IntrinsicID> intrinsicMap = {
- {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
- {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
- {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
- {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
- {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
- {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
- {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
- {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
- };
+ using intrinsicMap_t = std::map<std::string, IntrinsicID>;
+ static intrinsicMap_t& getIntrinsicMap() {
+ static std::map<std::string, IntrinsicID> intrinsicMap = {
+ {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
+ {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
+ {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
+ {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
+ {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
+ {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
+ {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
+ {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc}
+ };
+ return intrinsicMap;
+ }
// Forward decls
Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
- // clang-format off
- static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
- // 256 wide 512 wide
- {
- // AVX
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- {
- // AVX2
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- {
- // AVX512
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
-#if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
-#else
- {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
-#endif
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
-#if LLVM_VERSION_MAJOR < 7
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}},
-#else
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
-#endif
- {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
- }};
- // clang-format on
+ using intrinsicMapAdvanced_t = std::vector<std::map<std::string, X86Intrinsic>>;
+
+ static intrinsicMapAdvanced_t& getIntrinsicMapAdvanced()
+ {
+ // clang-format off
+ static intrinsicMapAdvanced_t intrinsicMapAdvanced = {
+ // 256 wide 512 wide
+ {
+ // AVX
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ },
+ {
+ // AVX2
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ },
+ {
+ // AVX512
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
+ #if LLVM_VERSION_MAJOR < 7
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
+ #else
+ {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ #endif
+ {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+ #if LLVM_VERSION_MAJOR < 7
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}},
+ #else
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VCONVERT_EMU}},
+ #endif
+ {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}}
+ }};
+ // clang-format on
+ return intrinsicMapAdvanced;
+ }
static uint32_t getBitWidth(VectorType *pVTy)
{
SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
}
- return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
+ return ConstantVector::getNullValue(getVectorType(pTy, numElem));
}
Value* GetMask(TargetWidth width)
#else
uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
#endif
- return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
+ return B->S_EXT(vi1Mask, getVectorType(B->mInt32Ty, numElem));
}
Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
Function* pFunc = pCallInst->getCalledFunction();
assert(pFunc);
- auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName().str()];
+ auto& intrinsic = getIntrinsicMapAdvanced()[mTarget][pFunc->getName().str()];
TargetWidth vecWidth;
Type* pElemTy;
GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
assert(pFunc);
// Forward to the advanced support if found
- if (intrinsicMap2[mTarget].find(pFunc->getName().str()) != intrinsicMap2[mTarget].end())
+ if (getIntrinsicMapAdvanced()[mTarget].find(pFunc->getName().str()) != getIntrinsicMapAdvanced()[mTarget].end())
{
return ProcessIntrinsicAdvanced(pCallInst);
}
- SWR_ASSERT(intrinsicMap.find(pFunc->getName().str()) != intrinsicMap.end(),
+ SWR_ASSERT(getIntrinsicMap().find(pFunc->getName().str()) != getIntrinsicMap().end(),
"Unimplemented intrinsic %s.",
pFunc->getName().str().c_str());
- Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName().str()];
+ Intrinsic::ID x86Intrinsic = getIntrinsicMap()[pFunc->getName().str()];
Function* pX86IntrinFunc =
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
v32Gather = UndefValue::get(vSrc->getType());
#if LLVM_VERSION_MAJOR > 10
- auto vi32Scale = ConstantVector::getSplat(ElementCount(numElem, false), cast<ConstantInt>(i32Scale));
+ auto vi32Scale = ConstantVector::getSplat(ElementCount::get(numElem, false), cast<ConstantInt>(i32Scale));
#else
auto vi32Scale = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
#endif
#else
uint32_t numElem = v64Mask->getType()->getVectorNumElements();
#endif
- v64Mask = B->S_EXT(v64Mask, VectorType::get(B->mInt64Ty, numElem));
+ v64Mask = B->S_EXT(v64Mask, getVectorType(B->mInt64Ty, numElem));
v64Mask = B->BITCAST(v64Mask, vSrc->getType());
Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
uint32_t numElemSrc1 = src1->getType()->getVectorNumElements();
uint32_t numElemMask1 = mask1->getType()->getVectorNumElements();
#endif
- src0 = B->BITCAST(src0, VectorType::get(B->mInt64Ty, numElemSrc0));
- mask0 = B->BITCAST(mask0, VectorType::get(B->mInt64Ty, numElemMask0));
+ src0 = B->BITCAST(src0, getVectorType(B->mInt64Ty, numElemSrc0));
+ mask0 = B->BITCAST(mask0, getVectorType(B->mInt64Ty, numElemMask0));
Value* gather0 =
B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
- src1 = B->BITCAST(src1, VectorType::get(B->mInt64Ty, numElemSrc1));
- mask1 = B->BITCAST(mask1, VectorType::get(B->mInt64Ty, numElemMask1));
+ src1 = B->BITCAST(src1, getVectorType(B->mInt64Ty, numElemSrc1));
+ mask1 = B->BITCAST(mask1, getVectorType(B->mInt64Ty, numElemMask1));
Value* gather1 =
B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));