Fix dg.torture tests with avx512
authorIlya Tocar <ilya.tocar@intel.com>
Wed, 5 Nov 2014 15:55:44 +0000 (15:55 +0000)
committerIlya Tocar <tocarip@gcc.gnu.org>
Wed, 5 Nov 2014 15:55:44 +0000 (18:55 +0300)
gcc/
* config/i386/i386.c (expand_vec_perm_pshufb): Try vpermq/vpermd
for 512-bit wide modes.
(expand_vec_perm_1): Use correct versions of patterns.
* config/i386/sse.md (avx512f_vec_dup_<mode>_1): New.
(vashr<mode>3<mask_name>): Split V8HImode and V16QImode.

From-SVN: r217138

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/sse.md

index e6351f49a7681cc3c341477159127c6bc8c68b08..2fe8c8af1e54a5c936e973ac06a717df8c3ae2b4 100644 (file)
@@ -1,3 +1,11 @@
+2014-11-05  Ilya Tocar  <ilya.tocar@intel.com>
+
+       * config/i386/i386.c (expand_vec_perm_pshufb): Try vpermq/vpermd
+       for 512-bit wide modes.
+       (expand_vec_perm_1): Use correct versions of patterns.
+       * config/i386/sse.md (avx512f_vec_dup<mode>_1): New.
+       (vashr<mode>3<mask_name>): Split V8HImode and V16QImode.
+
 2014-11-05  Ilya Enkovich  <ilya.enkovich@intel.com>
 
        * ipa-chkp.c: New.
index 8f03aa25e1e794dfc1a2022ac9c1cd556ac862ba..0a4c13bed4421fcf5e3fd74329cb9e658125916b 100644 (file)
@@ -46891,6 +46891,42 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
        {
          if (!TARGET_AVX512BW)
            return false;
+
+         /* If vpermq didn't work, vpshufb won't work either.  */
+         if (d->vmode == V8DFmode || d->vmode == V8DImode)
+           return false;
+
+         vmode = V64QImode;
+         if (d->vmode == V16SImode
+             || d->vmode == V32HImode
+             || d->vmode == V64QImode)
+           {
+             /* First see if vpermq can be used for
+                V16SImode/V32HImode/V64QImode.  */
+             if (valid_perm_using_mode_p (V8DImode, d))
+               {
+                 for (i = 0; i < 8; i++)
+                   perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
+                 if (d->testing_p)
+                   return true;
+                 target = gen_reg_rtx (V8DImode);
+                 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
+                                     perm, 8, false))
+                   {
+                     emit_move_insn (d->target,
+                                     gen_lowpart (d->vmode, target));
+                     return true;
+                   }
+                 return false;
+               }
+
+             /* Next see if vpermd can be used.  */
+             if (valid_perm_using_mode_p (V16SImode, d))
+               vmode = V16SImode;
+           }
+         /* Or if vpermps can be used.  */
+         else if (d->vmode == V16SFmode)
+           vmode = V16SImode;
          if (vmode == V64QImode)
            {
              /* vpshufb only works intra lanes, it is not
@@ -46910,6 +46946,9 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
   if (vmode == V8SImode)
     for (i = 0; i < 8; ++i)
       rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
+  else if (vmode == V16SImode)
+    for (i = 0; i < 16; ++i)
+      rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
   else
     {
       eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
@@ -46948,8 +46987,14 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
        emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
       else if (vmode == V8SFmode)
        emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
-      else
+      else if (vmode == V8SImode)
        emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
+      else if (vmode == V16SFmode)
+       emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
+      else if (vmode == V16SImode)
+       emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
+      else
+       gcc_unreachable ();
     }
   else
     {
@@ -47003,21 +47048,21 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
            {
            case V64QImode:
              if (TARGET_AVX512BW)
-               gen = gen_avx512bw_vec_dupv64qi;
+               gen = gen_avx512bw_vec_dupv64qi_1;
              break;
            case V32QImode:
              gen = gen_avx2_pbroadcastv32qi_1;
              break;
            case V32HImode:
              if (TARGET_AVX512BW)
-               gen = gen_avx512bw_vec_dupv32hi;
+               gen = gen_avx512bw_vec_dupv32hi_1;
              break;
            case V16HImode:
              gen = gen_avx2_pbroadcastv16hi_1;
              break;
            case V16SImode:
              if (TARGET_AVX512F)
-               gen = gen_avx512f_vec_dupv16si;
+               gen = gen_avx512f_vec_dupv16si_1;
              break;
            case V8SImode:
              gen = gen_avx2_pbroadcastv8si_1;
@@ -47030,18 +47075,18 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
              break;
            case V16SFmode:
              if (TARGET_AVX512F)
-               gen = gen_avx512f_vec_dupv16sf;
+               gen = gen_avx512f_vec_dupv16sf_1;
              break;
            case V8SFmode:
              gen = gen_avx2_vec_dupv8sf_1;
              break;
            case V8DFmode:
              if (TARGET_AVX512F)
-               gen = gen_avx512f_vec_dupv8df;
+               gen = gen_avx512f_vec_dupv8df_1;
              break;
            case V8DImode:
              if (TARGET_AVX512F)
-               gen = gen_avx512f_vec_dupv8di;
+               gen = gen_avx512f_vec_dupv8di_1;
              break;
            /* For other modes prefer other shuffles this function creates.  */
            default: break;
index 2757a1e781c4fda61ceff31bb79a521890785864..13ddd290bdb4140d6ada95502c14ac64d66b8b5e 100644 (file)
     (set_attr "prefix" "vex")
     (set_attr "mode" "V8SF")])
 
+(define_insn "avx512f_vec_dup<mode>_1"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+       (vec_duplicate:VF_512
+         (vec_select:<ssescalarmode>
+           (match_operand:VF_512 1 "register_operand" "v")
+           (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "sselog1")
+    (set_attr "prefix" "evex")
+    (set_attr "mode" "<MODE>")])
+
 (define_insn "vec_dupv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
        (vec_duplicate:V4SF
          (match_operand:VI48_256 2 "nonimmediate_operand")))]
   "TARGET_AVX2")
 
-(define_expand "vashr<mode>3<mask_name>"
-  [(set (match_operand:VI12_128 0 "register_operand")
-       (ashiftrt:VI12_128
-         (match_operand:VI12_128 1 "register_operand")
-         (match_operand:VI12_128 2 "nonimmediate_operand")))]
+(define_expand "vashrv8hi3<mask_name>"
+  [(set (match_operand:V8HI 0 "register_operand")
+       (ashiftrt:V8HI
+         (match_operand:V8HI 1 "register_operand")
+         (match_operand:V8HI 2 "nonimmediate_operand")))]
   "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
 {
   if (TARGET_XOP)
     {
-      rtx neg = gen_reg_rtx (<MODE>mode);
-      emit_insn (gen_neg<mode>2 (neg, operands[2]));
-      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
+      rtx neg = gen_reg_rtx (V8HImode);
+      emit_insn (gen_negv8hi2 (neg, operands[2]));
+      emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
       DONE;
     }
 })
 
+(define_expand "vashrv16qi3"
+  [(set (match_operand:V16QI 0 "register_operand")
+       (ashiftrt:V16QI
+         (match_operand:V16QI 1 "register_operand")
+         (match_operand:V16QI 2 "nonimmediate_operand")))]
+  "TARGET_XOP"
+{
+   rtx neg = gen_reg_rtx (V16QImode);
+   emit_insn (gen_negv16qi2 (neg, operands[2]));
+   emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
+   DONE;
+})
+
 (define_expand "vashrv2di3<mask_name>"
   [(set (match_operand:V2DI 0 "register_operand")
        (ashiftrt:V2DI
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
+(define_insn "<avx512>_vec_dup<mode>_1"
+  [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
+       (vec_duplicate:VI_AVX512BW
+         (vec_select:VI_AVX512BW
+           (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
+           (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
+   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<avx512>_vec_dup<mode><mask_name>"
   [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
        (vec_duplicate:V48_AVX512VL