swr: [rasterizer] add support for llvm-3.9
authorTim Rowley <timothy.o.rowley@intel.com>
Tue, 14 Jun 2016 23:54:34 +0000 (17:54 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Tue, 12 Jul 2016 16:09:49 +0000 (11:09 -0500)
v2: use signed compare, remove unneeded vmask

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
src/gallium/drivers/swr/rasterizer/jitter/scripts/gen_llvm_ir_macros.py

index 671178f719fffc6c6428c52dd922e269d76104a7..da77f600a71f7b96bd09b2a62e6fd538e24b745b 100644 (file)
@@ -700,20 +700,22 @@ Value *Builder::PSHUFB(Value* a, Value* b)
 /// lower 8 values are used.
 Value *Builder::PMOVSXBD(Value* a)
 {
-    Value* res;
+    // llvm-3.9 removed the pmovsxbd intrinsic
+#if HAVE_LLVM < 0x309
     // use avx2 byte sign extend instruction if available
     if(JM()->mArch.AVX2())
     {
-        res = VPMOVSXBD(a);
+        Function *pmovsxbd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxbd);
+        return CALL(pmovsxbd, std::initializer_list<Value*>{a});
     }
     else
+#endif
     {
         // VPMOVSXBD output type
         Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
         // Extract 8 values from 128bit lane and sign extend
-        res = S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
+        return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
     }
-    return res;
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -722,20 +724,22 @@ Value *Builder::PMOVSXBD(Value* a)
 /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
 Value *Builder::PMOVSXWD(Value* a)
 {
-    Value* res;
+    // llvm-3.9 removed the pmovsxwd intrinsic
+#if HAVE_LLVM < 0x309
     // use avx2 word sign extend if available
     if(JM()->mArch.AVX2())
     {
-        res = VPMOVSXWD(a);
+        Function *pmovsxwd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxwd);
+        return CALL(pmovsxwd, std::initializer_list<Value*>{a});
     }
     else
+#endif
     {
         // VPMOVSXWD output type
         Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
         // Extract 8 values from 128bit lane and sign extend
-        res = S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
+        return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
     }
-    return res;
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -875,9 +879,15 @@ Value *Builder::CVTPS2PH(Value* a, Value* rounding)
 
 Value *Builder::PMAXSD(Value* a, Value* b)
 {
+    // llvm-3.9 removed the pmax intrinsics
+#if HAVE_LLVM >= 0x309
+    Value* cmp = ICMP_SGT(a, b);
+    return SELECT(cmp, a, b);
+#else
     if (JM()->mArch.AVX2())
     {
-        return VPMAXSD(a, b);
+        Function* pmaxsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmaxs_d);
+        return CALL(pmaxsd, {a, b});
     }
     else
     {
@@ -900,13 +910,20 @@ Value *Builder::PMAXSD(Value* a, Value* b)
 
         return result;
     }
+#endif
 }
 
 Value *Builder::PMINSD(Value* a, Value* b)
 {
+    // llvm-3.9 removed the pmin intrinsics
+#if HAVE_LLVM >= 0x309
+    Value* cmp = ICMP_SLT(a, b);
+    return SELECT(cmp, a, b);
+#else
     if (JM()->mArch.AVX2())
     {
-        return VPMINSD(a, b);
+        Function* pminsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmins_d);
+        return CALL(pminsd, {a, b});
     }
     else
     {
@@ -929,6 +946,7 @@ Value *Builder::PMINSD(Value* a, Value* b)
 
         return result;
     }
+#endif
 }
 
 void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets, 
index 4963c5ef593a8cabf890c6817aa41c09db25af9f..234889b6610305f2c9b2c5a9b2e84bb554093c18 100644 (file)
@@ -91,8 +91,6 @@ intrinsics = [
         ["VRCPPS", "x86_avx_rcp_ps_256", ["a"]],
         ["VMINPS", "x86_avx_min_ps_256", ["a", "b"]],
         ["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]],
-        ["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]],
-        ["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]],
         ["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]],
         ["VCMPPS", "x86_avx_cmp_ps_256", ["a", "b", "cmpop"]],
         ["VBLENDVPS", "x86_avx_blendv_ps_256", ["a", "b", "mask"]],
@@ -100,8 +98,6 @@ intrinsics = [
         ["VMASKLOADD", "x86_avx2_maskload_d_256", ["src", "mask"]],
         ["VMASKMOVPS", "x86_avx_maskload_ps_256", ["src", "mask"]],
         ["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]],
-        ["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]],  # sign extend packed 8bit components
-        ["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]],  # sign extend packed 16bit components
         ["VPERMD", "x86_avx2_permd", ["a", "idx"]],
         ["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
         ["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
@@ -110,7 +106,6 @@ intrinsics = [
         ["VPTESTC", "x86_avx_ptestc_256", ["a", "b"]],
         ["VPTESTZ", "x86_avx_ptestz_256", ["a", "b"]],
         ["VFMADDPS", "x86_fma_vfmadd_ps_256", ["a", "b", "c"]],
-        ["VCVTTPS2DQ", "x86_avx_cvtt_ps2dq_256", ["a"]],
         ["VMOVMSKPS", "x86_avx_movmsk_ps_256", ["a"]],
         ["INTERRUPT", "x86_int", ["a"]],
     ]