i386: Prevent spurious FP exceptions with _mm_cvt{,t}ps_pi32 [PR98522]
authorUros Bizjak <ubizjak@gmail.com>
Tue, 5 Jan 2021 13:42:29 +0000 (14:42 +0100)
committerUros Bizjak <ubizjak@gmail.com>
Tue, 5 Jan 2021 13:45:28 +0000 (14:45 +0100)
Prevent spurious FP exceptions with _mm_cvt{,t}ps_pi32 for TARGET_MMX_WITH_SSE
by clearing the top 64 bytes of the input XMM register.

2021-01-05  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
PR target/98522
* config/i386/sse.md (sse_cvtps2pi): Redefine as define_insn_and_split.
Clear the top 64 bytes of the input XMM register.
(sse_cvttps2pi): Ditto.

gcc/testsuite

PR target/98522
* gcc.target/i386/pr98522.c: New test.

gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/pr98522.c [new file with mode: 0644]

index 582bc07a00aa22af308022a16e14c33d79b146ee..4fd7358dc183c11a8f4447aed3e271655362bb81 100644 (file)
    (set_attr "type" "ssecvt")
    (set_attr "mode" "V4SF")])
 
-(define_insn "sse_cvtps2pi"
+(define_insn_and_split "sse_cvtps2pi"
   [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
        (vec_select:V2SI
-         (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")]
+         (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
                       UNSPEC_FIX_NOTRUNC)
          (parallel [(const_int 0) (const_int 1)])))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
   "@
    cvtps2pi\t{%1, %0|%0, %q1}
-   %vcvtps2dq\t{%1, %0|%0, %1}"
+   #"
+  "TARGET_SSE2 && reload_completed
+   && SSE_REG_P (operands[0])"
+  [(const_int 0)]
+{
+  rtx op1 = lowpart_subreg (V2SFmode, operands[1],
+                           GET_MODE (operands[1]));
+  rtx tmp = lowpart_subreg (V4SFmode, operands[0],
+                           GET_MODE (operands[0]));
+
+  op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
+  emit_insn (gen_rtx_SET (tmp, op1));
+
+  rtx dest = lowpart_subreg (V4SImode, operands[0],
+                           GET_MODE (operands[0]));
+  emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
+  DONE;
+}
   [(set_attr "isa" "*,sse2")
    (set_attr "mmx_isa" "native,*")
    (set_attr "type" "ssecvt")
    (set_attr "unit" "mmx,*")
    (set_attr "mode" "DI")])
 
-(define_insn "sse_cvttps2pi"
+(define_insn_and_split "sse_cvttps2pi"
   [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
        (vec_select:V2SI
-         (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
+         (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
          (parallel [(const_int 0) (const_int 1)])))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
   "@
    cvttps2pi\t{%1, %0|%0, %q1}
-   %vcvttps2dq\t{%1, %0|%0, %1}"
+   #"
+  "TARGET_SSE2 && reload_completed
+   && SSE_REG_P (operands[0])"
+  [(const_int 0)]
+{
+  rtx op1 = lowpart_subreg (V2SFmode, operands[1],
+                           GET_MODE (operands[1]));
+  rtx tmp = lowpart_subreg (V4SFmode, operands[0],
+                           GET_MODE (operands[0]));
+
+  op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
+  emit_insn (gen_rtx_SET (tmp, op1));
+
+  rtx dest = lowpart_subreg (V4SImode, operands[0],
+                           GET_MODE (operands[0]));
+  emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
+  DONE;
+}
   [(set_attr "isa" "*,sse2")
    (set_attr "mmx_isa" "native,*")
    (set_attr "type" "ssecvt")
 (define_insn "*vec_concatv4sf_0"
   [(set (match_operand:V4SF 0 "register_operand"       "=v")
        (vec_concat:V4SF
-         (match_operand:V2SF 1 "nonimmediate_operand" "xm")
+         (match_operand:V2SF 1 "nonimmediate_operand" "vm")
          (match_operand:V2SF 2 "const0_operand"       " C")))]
   "TARGET_SSE2"
   "%vmovq\t{%1, %0|%0, %1}"
   [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
        (vec_merge:VF2_512_256
          (vec_duplicate:VF2_512_256
-           (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm"))
+           (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
          (match_operand:VF2_512_256 1 "const0_operand" "C")
          (const_int 1)))]
   "TARGET_AVX"
diff --git a/gcc/testsuite/gcc.target/i386/pr98522.c b/gcc/testsuite/gcc.target/i386/pr98522.c
new file mode 100644 (file)
index 0000000..762f2ed
--- /dev/null
@@ -0,0 +1,39 @@
+/* PR target/98522 */
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#include <emmintrin.h>
+#include <fenv.h>
+
+__m64
+__attribute__((noinline))
+test_cvt (__m128 a)
+{
+  return _mm_cvt_ps2pi (a);
+}
+
+__m64
+__attribute__((noinline))
+test_cvtt (__m128 a)
+{
+  return _mm_cvtt_ps2pi (a);
+}
+
+int
+main ()
+{
+  __m128 x = (__m128)(__m128i){0x0000000000000000LL, 0x7fffffffffffffffLL};
+  volatile __m64 y;
+
+  feclearexcept (FE_INVALID);
+
+  y = test_cvt(x);
+  y = test_cvtt (x);
+
+    if (fetestexcept (FE_INVALID))
+    __builtin_abort ();
+
+  return 0;
+}
+