swr/rast: fix VCVTPD2PS generation for AVX512
authorAlok Hota <alok.hota@intel.com>
Wed, 16 May 2018 16:14:19 +0000 (11:14 -0500)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Thu, 17 May 2018 15:53:06 +0000 (10:53 -0500)
Reviewed-By: George Kyriazis <george.kyriazis@intel.com>
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp

index 3caea67e1690ad7f363f676c99a19b249bdf38b0..e0296f6255f3bb324f005da155d57f0a008fb2b0 100644 (file)
@@ -265,8 +265,16 @@ namespace SwrJit
                 // Assuming the intrinsics are consistent and place the src operand and mask last in the argument list.
                 if (mTarget == AVX512)
                 {
-                    args.push_back(GetZeroVec(vecWidth, pElemTy));
-                    args.push_back(GetMask(vecWidth));
+                    if (pFunc->getName().equals("meta.intrinsic.VCVTPD2PS")) {
+                        args.push_back(GetZeroVec(W256, pCallInst->getType()->getScalarType()));
+                        args.push_back(GetMask(W256));
+                        // for AVX512 VCVTPD2PS, we also have to add rounding mode
+                        args.push_back(B->C(_MM_FROUND_TO_NEAREST_INT |
+                                            _MM_FROUND_NO_EXC));
+                    } else {
+                        args.push_back(GetZeroVec(vecWidth, pElemTy));
+                        args.push_back(GetMask(vecWidth));
+                    }
                 }
 
                 return B->CALLA(pIntrin, args);