sse.md (vec_interleave_high<mode>, [...]): Add AVX2 expanders for VI_256 modes.
authorJakub Jelinek <jakub@redhat.com>
Fri, 14 Oct 2011 16:55:25 +0000 (18:55 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Fri, 14 Oct 2011 16:55:25 +0000 (18:55 +0200)
* config/i386/sse.md (vec_interleave_high<mode>,
vec_interleave_low<mode>): Add AVX2 expanders for VI_256
modes.
* config/i386/i386.c (expand_vec_perm_interleave3): New function.
(ix86_expand_vec_perm_builtin_1): Call it.

From-SVN: r179995

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/sse.md

index 48b703c09e7685b189455e3bd6f042cbd8c4b33d..d43a5cf3e5a8f9c74998062023a7636091b0d768 100644 (file)
@@ -1,3 +1,11 @@
+2011-10-14  Jakub Jelinek  <jakub@redhat.com>
+
+       * config/i386/sse.md (vec_interleave_high<mode>,
+       vec_interleave_low<mode>): Add AVX2 expanders for VI_256
+       modes.
+       * config/i386/i386.c (expand_vec_perm_interleave3): New function.
+       (ix86_expand_vec_perm_builtin_1): Call it.
+
 2011-10-14  Georg-Johann Lay  <avr@gjlay.de>
 
        Fix thinko from r179765
index df6267b664c93c131ba27c6959be0e635af7fa71..f09a37277faa133f2a2fbbc5b4dfb0bdfdd124bd 100644 (file)
@@ -35474,6 +35474,82 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
+   a two vector permutation using 2 intra-lane interleave insns
+   and cross-lane shuffle for 32-byte vectors.  */
+
+static bool
+expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
+{
+  unsigned i, nelt;
+  rtx (*gen) (rtx, rtx, rtx);
+
+  if (d->op0 == d->op1)
+    return false;
+  if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
+    ;
+  else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
+    ;
+  else
+    return false;
+
+  nelt = d->nelt;
+  if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
+    return false;
+  for (i = 0; i < nelt; i += 2)
+    if (d->perm[i] != d->perm[0] + i / 2
+       || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
+      return false;
+
+  if (d->testing_p)
+    return true;
+
+  switch (d->vmode)
+    {
+    case V32QImode:
+      if (d->perm[0])
+       gen = gen_vec_interleave_highv32qi;
+      else
+       gen = gen_vec_interleave_lowv32qi;
+      break;
+    case V16HImode:
+      if (d->perm[0])
+       gen = gen_vec_interleave_highv16hi;
+      else
+       gen = gen_vec_interleave_lowv16hi;
+      break;
+    case V8SImode:
+      if (d->perm[0])
+       gen = gen_vec_interleave_highv8si;
+      else
+       gen = gen_vec_interleave_lowv8si;
+      break;
+    case V4DImode:
+      if (d->perm[0])
+       gen = gen_vec_interleave_highv4di;
+      else
+       gen = gen_vec_interleave_lowv4di;
+      break;
+    case V8SFmode:
+      if (d->perm[0])
+       gen = gen_vec_interleave_highv8sf;
+      else
+       gen = gen_vec_interleave_lowv8sf;
+      break;
+    case V4DFmode:
+      if (d->perm[0])
+       gen = gen_vec_interleave_highv4df;
+      else
+       gen = gen_vec_interleave_lowv4df;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen (d->target, d->op0, d->op1));
+  return true;
+}
+
 /* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
    permutation with two pshufb insns and an ior.  We should have already
    failed all two instruction sequences.  */
@@ -35972,6 +36048,9 @@ ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_pshufb2 (d))
     return true;
 
+  if (expand_vec_perm_interleave3 (d))
+    return true;
+
   /* Try sequences of four instructions.  */
 
   if (expand_vec_perm_vpshufb2_vpermq (d))
index 8b07f9a877a6eeb7e25c2d13a49caa595e08f035..016eae2d371925a76b26405478b23aba2ce31e63 100644 (file)
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
+(define_expand "vec_interleave_high<mode>"
+  [(match_operand:VI_256 0 "register_operand" "=x")
+   (match_operand:VI_256 1 "register_operand" "x")
+   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ "TARGET_AVX2"
+{
+  rtx t1 = gen_reg_rtx (<MODE>mode);
+  rtx t2 = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
+  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
+  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
+                               gen_lowpart (V4DImode, t1),
+                               gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
+  DONE;
+})
+
+(define_expand "vec_interleave_low<mode>"
+  [(match_operand:VI_256 0 "register_operand" "=x")
+   (match_operand:VI_256 1 "register_operand" "x")
+   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ "TARGET_AVX2"
+{
+  rtx t1 = gen_reg_rtx (<MODE>mode);
+  rtx t2 = gen_reg_rtx (<MODE>mode);
+  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
+  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
+  emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
+                               gen_lowpart (V4DImode, t1),
+                               gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
+  DONE;
+})
+
 ;; Modes handled by pinsr patterns.
 (define_mode_iterator PINSR_MODE
   [(V16QI "TARGET_SSE4_1") V8HI