swr: Fix crashes on non-AVX hardware
[mesa.git] / src / gallium / drivers / swr / rasterizer / jitter / functionpasses / lower_x86.cpp
index c583c25f3bbbc8fa7e51ec78b8b0db0ccc8ec46a..216121bc51f454f53b314761ae6c94bcdb03e662 100644 (file)
@@ -74,16 +74,20 @@ namespace SwrJit
 
     // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the
     // previous behavior of mapping directly to avx/avx2 intrinsics.
-    static std::map<std::string, IntrinsicID> intrinsicMap = {
-        {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
-        {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
-        {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
-        {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
-        {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
-        {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
-        {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
-        {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
-    };
+    using intrinsicMap_t = std::map<std::string, IntrinsicID>;
+    static intrinsicMap_t& getIntrinsicMap() {
+        static std::map<std::string, IntrinsicID> intrinsicMap = {
+            {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
+            {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
+            {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
+            {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
+            {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
+            {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
+            {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
+            {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc}
+        };
+        return intrinsicMap;
+    }
 
     // Forward decls
     Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
@@ -108,58 +112,64 @@ namespace SwrJit
 
     static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
 
-    // clang-format off
-    static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
-        //                               256 wide                               512 wide
-        {
-            // AVX
-            {"meta.intrinsic.VRCPPS",    {{Intrinsic::x86_avx_rcp_ps_256,       DOUBLE},                    NO_EMU}},
-            {"meta.intrinsic.VPERMPS",   {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VPERM_EMU}},
-            {"meta.intrinsic.VPERMD",    {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VPERM_EMU}},
-            {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VGATHER_EMU}},
-            {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VGATHER_EMU}},
-            {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VGATHER_EMU}},
-            {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic,           Intrinsic::not_intrinsic}, VSCATTER_EMU}},
-            {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256,   Intrinsic::not_intrinsic},  NO_EMU}},
-            {"meta.intrinsic.VROUND",    {{Intrinsic::x86_avx_round_ps_256,     DOUBLE},                    NO_EMU}},
-            {"meta.intrinsic.VHSUBPS",   {{Intrinsic::x86_avx_hsub_ps_256,      DOUBLE},                    NO_EMU}},
-        },
-        {
-            // AVX2
-            {"meta.intrinsic.VRCPPS",       {{Intrinsic::x86_avx_rcp_ps_256,    DOUBLE},                    NO_EMU}},
-            {"meta.intrinsic.VPERMPS",      {{Intrinsic::x86_avx2_permps,       Intrinsic::not_intrinsic},  VPERM_EMU}},
-            {"meta.intrinsic.VPERMD",       {{Intrinsic::x86_avx2_permd,        Intrinsic::not_intrinsic},  VPERM_EMU}},
-            {"meta.intrinsic.VGATHERPD",    {{Intrinsic::not_intrinsic,         Intrinsic::not_intrinsic},  VGATHER_EMU}},
-            {"meta.intrinsic.VGATHERPS",    {{Intrinsic::not_intrinsic,         Intrinsic::not_intrinsic},  VGATHER_EMU}},
-            {"meta.intrinsic.VGATHERDD",    {{Intrinsic::not_intrinsic,         Intrinsic::not_intrinsic},  VGATHER_EMU}},
-            {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic,           Intrinsic::not_intrinsic}, VSCATTER_EMU}},
-            {"meta.intrinsic.VCVTPD2PS",    {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE},                   NO_EMU}},
-            {"meta.intrinsic.VROUND",       {{Intrinsic::x86_avx_round_ps_256,  DOUBLE},                    NO_EMU}},
-            {"meta.intrinsic.VHSUBPS",      {{Intrinsic::x86_avx_hsub_ps_256,   DOUBLE},                    NO_EMU}},
-        },
-        {
-            // AVX512
-            {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256,     Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
-#if LLVM_VERSION_MAJOR < 7
-            {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
-            {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
-#else
-            {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic,              Intrinsic::not_intrinsic}, VPERM_EMU}},
-            {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic,               Intrinsic::not_intrinsic}, VPERM_EMU}},
-#endif
-            {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VGATHER_EMU}},
-            {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VGATHER_EMU}},
-            {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VGATHER_EMU}},
-            {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic,           Intrinsic::not_intrinsic}, VSCATTER_EMU}},
-#if LLVM_VERSION_MAJOR < 7
-            {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}},
-#else
-            {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VCONVERT_EMU}},
-#endif
-            {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic,               Intrinsic::not_intrinsic}, VROUND_EMU}},
-            {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic,              Intrinsic::not_intrinsic}, VHSUB_EMU}},
-        }};
-    // clang-format on
+    using intrinsicMapAdvanced_t = std::vector<std::map<std::string, X86Intrinsic>>;
+
+    static intrinsicMapAdvanced_t&  getIntrinsicMapAdvanced()
+    {
+        // clang-format off
+        static intrinsicMapAdvanced_t intrinsicMapAdvanced = {
+            //                               256 wide                               512 wide
+            {
+                // AVX
+                {"meta.intrinsic.VRCPPS",    {{Intrinsic::x86_avx_rcp_ps_256,       DOUBLE},                    NO_EMU}},
+                {"meta.intrinsic.VPERMPS",   {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VPERM_EMU}},
+                {"meta.intrinsic.VPERMD",    {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VPERM_EMU}},
+                {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VGATHER_EMU}},
+                {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VGATHER_EMU}},
+                {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic},  VGATHER_EMU}},
+                {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic,           Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+                {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256,   Intrinsic::not_intrinsic},  NO_EMU}},
+                {"meta.intrinsic.VROUND",    {{Intrinsic::x86_avx_round_ps_256,     DOUBLE},                    NO_EMU}},
+                {"meta.intrinsic.VHSUBPS",   {{Intrinsic::x86_avx_hsub_ps_256,      DOUBLE},                    NO_EMU}},
+            },
+            {
+                // AVX2
+                {"meta.intrinsic.VRCPPS",       {{Intrinsic::x86_avx_rcp_ps_256,    DOUBLE},                    NO_EMU}},
+                {"meta.intrinsic.VPERMPS",      {{Intrinsic::x86_avx2_permps,       Intrinsic::not_intrinsic},  VPERM_EMU}},
+                {"meta.intrinsic.VPERMD",       {{Intrinsic::x86_avx2_permd,        Intrinsic::not_intrinsic},  VPERM_EMU}},
+                {"meta.intrinsic.VGATHERPD",    {{Intrinsic::not_intrinsic,         Intrinsic::not_intrinsic},  VGATHER_EMU}},
+                {"meta.intrinsic.VGATHERPS",    {{Intrinsic::not_intrinsic,         Intrinsic::not_intrinsic},  VGATHER_EMU}},
+                {"meta.intrinsic.VGATHERDD",    {{Intrinsic::not_intrinsic,         Intrinsic::not_intrinsic},  VGATHER_EMU}},
+                {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic,           Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+                {"meta.intrinsic.VCVTPD2PS",    {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE},                   NO_EMU}},
+                {"meta.intrinsic.VROUND",       {{Intrinsic::x86_avx_round_ps_256,  DOUBLE},                    NO_EMU}},
+                {"meta.intrinsic.VHSUBPS",      {{Intrinsic::x86_avx_hsub_ps_256,   DOUBLE},                    NO_EMU}},
+            },
+            {
+                // AVX512
+                {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256,     Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
+    #if LLVM_VERSION_MAJOR < 7
+                {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
+                {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
+    #else
+                {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic,              Intrinsic::not_intrinsic}, VPERM_EMU}},
+                {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic,               Intrinsic::not_intrinsic}, VPERM_EMU}},
+    #endif
+                {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VGATHER_EMU}},
+                {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VGATHER_EMU}},
+                {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VGATHER_EMU}},
+                {"meta.intrinsic.VSCATTERPS", {{Intrinsic::not_intrinsic,           Intrinsic::not_intrinsic}, VSCATTER_EMU}},
+    #if LLVM_VERSION_MAJOR < 7
+                {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512}, NO_EMU}},
+    #else
+                {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::not_intrinsic,            Intrinsic::not_intrinsic}, VCONVERT_EMU}},
+    #endif
+                {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic,               Intrinsic::not_intrinsic}, VROUND_EMU}},
+                {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic,              Intrinsic::not_intrinsic}, VHSUB_EMU}}
+            }};
+        // clang-format on
+        return intrinsicMapAdvanced;
+    }
 
     static uint32_t getBitWidth(VectorType *pVTy)
     {
@@ -287,7 +297,7 @@ namespace SwrJit
                 SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
             }
 
-            return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
+            return ConstantVector::getNullValue(getVectorType(pTy, numElem));
         }
 
         Value* GetMask(TargetWidth width)
@@ -315,7 +325,7 @@ namespace SwrJit
 #else
             uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
 #endif
-            return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
+            return B->S_EXT(vi1Mask, getVectorType(B->mInt32Ty, numElem));
         }
 
         Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
@@ -323,7 +333,7 @@ namespace SwrJit
             Function*   pFunc = pCallInst->getCalledFunction();
             assert(pFunc);
 
-            auto&       intrinsic = intrinsicMap2[mTarget][pFunc->getName().str()];
+            auto&       intrinsic = getIntrinsicMapAdvanced()[mTarget][pFunc->getName().str()];
             TargetWidth vecWidth;
             Type*       pElemTy;
             GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
@@ -385,16 +395,16 @@ namespace SwrJit
             assert(pFunc);
 
             // Forward to the advanced support if found
-            if (intrinsicMap2[mTarget].find(pFunc->getName().str()) != intrinsicMap2[mTarget].end())
+            if (getIntrinsicMapAdvanced()[mTarget].find(pFunc->getName().str()) != getIntrinsicMapAdvanced()[mTarget].end())
             {
                 return ProcessIntrinsicAdvanced(pCallInst);
             }
 
-            SWR_ASSERT(intrinsicMap.find(pFunc->getName().str()) != intrinsicMap.end(),
+            SWR_ASSERT(getIntrinsicMap().find(pFunc->getName().str()) != getIntrinsicMap().end(),
                        "Unimplemented intrinsic %s.",
                        pFunc->getName().str().c_str());
 
-            Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName().str()];
+            Intrinsic::ID x86Intrinsic = getIntrinsicMap()[pFunc->getName().str()];
             Function*     pX86IntrinFunc =
                 Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
 
@@ -546,7 +556,7 @@ namespace SwrJit
 
             v32Gather        = UndefValue::get(vSrc->getType());
 #if LLVM_VERSION_MAJOR > 10
-            auto vi32Scale   = ConstantVector::getSplat(ElementCount(numElem, false), cast<ConstantInt>(i32Scale));
+            auto vi32Scale   = ConstantVector::getSplat(ElementCount::get(numElem, false), cast<ConstantInt>(i32Scale));
 #else
             auto vi32Scale   = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
 #endif
@@ -609,7 +619,7 @@ namespace SwrJit
 #else
                     uint32_t numElem = v64Mask->getType()->getVectorNumElements();
 #endif
-                    v64Mask = B->S_EXT(v64Mask, VectorType::get(B->mInt64Ty, numElem));
+                    v64Mask = B->S_EXT(v64Mask, getVectorType(B->mInt64Ty, numElem));
                     v64Mask = B->BITCAST(v64Mask, vSrc->getType());
 
                     Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
@@ -632,12 +642,12 @@ namespace SwrJit
                     uint32_t numElemSrc1  = src1->getType()->getVectorNumElements();
                     uint32_t numElemMask1 = mask1->getType()->getVectorNumElements();
 #endif
-                    src0 = B->BITCAST(src0, VectorType::get(B->mInt64Ty, numElemSrc0));
-                    mask0 = B->BITCAST(mask0, VectorType::get(B->mInt64Ty, numElemMask0));
+                    src0 = B->BITCAST(src0, getVectorType(B->mInt64Ty, numElemSrc0));
+                    mask0 = B->BITCAST(mask0, getVectorType(B->mInt64Ty, numElemMask0));
                     Value* gather0 =
                         B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
-                    src1 = B->BITCAST(src1, VectorType::get(B->mInt64Ty, numElemSrc1));
-                    mask1 = B->BITCAST(mask1, VectorType::get(B->mInt64Ty, numElemMask1));
+                    src1 = B->BITCAST(src1, getVectorType(B->mInt64Ty, numElemSrc1));
+                    mask1 = B->BITCAST(mask1, getVectorType(B->mInt64Ty, numElemMask1));
                     Value* gather1 =
                         B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
                     v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));