re PR target/85328 (accessing ymm16 with non-avx512 instruction form)
authorJakub Jelinek <jakub@redhat.com>
Thu, 12 Apr 2018 11:17:23 +0000 (13:17 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Thu, 12 Apr 2018 11:17:23 +0000 (13:17 +0200)
PR target/85328
* config/i386/sse.md
(<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name> split,
<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name> split,
vec_extract_lo_<mode><mask_name> split, vec_extract_lo_v32hi,
vec_extract_lo_v64qi): For non-AVX512VL if input is xmm16+ reg
and output is a reg, avoid creating invalid lowpart subreg, but
instead split into a 512-bit move.  Don't split if not AVX512VL,
input is xmm16+ reg and output is a mem.
(vec_extract_lo_<mode><mask_name>, vec_extract_lo_v32hi,
vec_extract_lo_v64qi): Don't require split if not AVX512VL, input is
xmm16+ reg and output is a mem.

* gcc.target/i386/pr85328.c: New test.

From-SVN: r259344

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr85328.c [new file with mode: 0644]

index 5d10cbf8716d8b3778abaf36558a24ed6a8a2067..487d40170204ed08395b164a7a75d0d0fa1e60b5 100644 (file)
@@ -1,3 +1,18 @@
+2018-04-12  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/85328
+       * config/i386/sse.md
+       (<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name> split,
+       <mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name> split,
+       vec_extract_lo_<mode><mask_name> split, vec_extract_lo_v32hi,
+       vec_extract_lo_v64qi): For non-AVX512VL if input is xmm16+ reg
+       and output is a reg, avoid creating invalid lowpart subreg, but
+       instead split into a 512-bit move.  Don't split if not AVX512VL,
+       input is xmm16+ reg and output is a mem.
+       (vec_extract_lo_<mode><mask_name>, vec_extract_lo_v32hi,
+       vec_extract_lo_v64qi): Don't require split if not AVX512VL, input is
+       xmm16+ reg and output is a mem.
+
 2018-04-12  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
        * config/s390/s390.c (s390_output_indirect_thunk_function): Check
index 8c970e0cc8aa58b56fc467409155960352a1175e..c3345d0cfdd624de7055e6ed828bc8f7e1bfd9f6 100644 (file)
        (vec_select:<ssequartermode>
          (match_operand:V8FI 1 "register_operand")
          (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_AVX512DQ && reload_completed"
+  "TARGET_AVX512DQ
+   && reload_completed
+   && (TARGET_AVX512VL
+       || REG_P (operands[0])
+       || !EXT_REX_SSE_REG_P (operands[1]))"
   [(set (match_dup 0) (match_dup 1))]
-  "operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
+{
+  if (!TARGET_AVX512VL
+      && REG_P (operands[0])
+      && EXT_REX_SSE_REG_P (operands[1]))
+    operands[0]
+      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
+  else
+    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
+})
 
 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
   [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
          (match_operand:V16FI 1 "register_operand")
          (parallel [(const_int 0) (const_int 1)
                     (const_int 2) (const_int 3)])))]
-  "TARGET_AVX512F && reload_completed"
+  "TARGET_AVX512F
+   && reload_completed
+   && (TARGET_AVX512VL
+       || REG_P (operands[0])
+       || !EXT_REX_SSE_REG_P (operands[1]))"
   [(set (match_dup 0) (match_dup 1))]
-  "operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);")
+{
+  if (!TARGET_AVX512VL
+      && REG_P (operands[0])
+      && EXT_REX_SSE_REG_P (operands[1]))
+    operands[0]
+      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
+  else
+    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
+})
 
 (define_mode_attr extract_type_2
   [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
    && <mask_mode512bit_condition>
    && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
 {
-  if (<mask_applied>)
+  if (<mask_applied>
+      || (!TARGET_AVX512VL
+         && !REG_P (operands[0])
+         && EXT_REX_SSE_REG_P (operands[1])))
     return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
   else
     return "#";
            (const_int 4) (const_int 5)
            (const_int 6) (const_int 7)])))]
   "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && reload_completed"
+   && reload_completed
+   && (TARGET_AVX512VL
+       || REG_P (operands[0])
+       || !EXT_REX_SSE_REG_P (operands[1]))"
   [(set (match_dup 0) (match_dup 1))]
-  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
+{
+  if (!TARGET_AVX512VL
+      && REG_P (operands[0])
+      && EXT_REX_SSE_REG_P (operands[1]))
+    operands[0]
+      = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
+  else
+    operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
+})
 
 (define_insn "vec_extract_lo_<mode><mask_name>"
   [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
                     (const_int 12) (const_int 13)
                     (const_int 14) (const_int 15)])))]
   "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "#"
-  "&& reload_completed"
+{
+  if (TARGET_AVX512VL
+      || REG_P (operands[0])
+      || !EXT_REX_SSE_REG_P (operands[1]))
+    return "#";
+  else
+    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
+}
+  "&& reload_completed
+   && (TARGET_AVX512VL
+       || REG_P (operands[0])
+       || !EXT_REX_SSE_REG_P (operands[1]))"
   [(set (match_dup 0) (match_dup 1))]
-  "operands[1] = gen_lowpart (V16HImode, operands[1]);")
+{
+  if (!TARGET_AVX512VL
+      && REG_P (operands[0])
+      && EXT_REX_SSE_REG_P (operands[1]))
+    operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
+  else
+    operands[1] = gen_lowpart (V16HImode, operands[1]);
+})
 
 (define_insn "vec_extract_hi_v32hi"
   [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
                     (const_int 28) (const_int 29)
                     (const_int 30) (const_int 31)])))]
   "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "#"
-  "&& reload_completed"
+{
+  if (TARGET_AVX512VL
+      || REG_P (operands[0])
+      || !EXT_REX_SSE_REG_P (operands[1]))
+    return "#";
+  else
+    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
+}
+  "&& reload_completed
+   && (TARGET_AVX512VL
+       || REG_P (operands[0])
+       || !EXT_REX_SSE_REG_P (operands[1]))"
   [(set (match_dup 0) (match_dup 1))]
-  "operands[1] = gen_lowpart (V32QImode, operands[1]);")
+{
+  if (!TARGET_AVX512VL
+      && REG_P (operands[0])
+      && EXT_REX_SSE_REG_P (operands[1]))
+    operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
+  else
+    operands[1] = gen_lowpart (V32QImode, operands[1]);
+})
 
 (define_insn "vec_extract_hi_v64qi"
   [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
index d6cdfd4bc2893568e2c5fd052a49beef45d5259a..03099159db0d1c923ac2d85555b96bcf1abfa911 100644 (file)
@@ -1,3 +1,8 @@
+2018-04-12  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/85328
+       * gcc.target/i386/pr85328.c: New test.
+
 2018-04-12  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
        * gcc.target/s390/nobp-no-dwarf2-cfi.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/pr85328.c b/gcc/testsuite/gcc.target/i386/pr85328.c
new file mode 100644 (file)
index 0000000..987ea82
--- /dev/null
@@ -0,0 +1,18 @@
+/* PR target/85328 */
+/* { dg-do assemble { target avx512f } } */
+/* { dg-options "-O3 -fno-caller-saves -mavx512f" } */
+
+typedef char U __attribute__((vector_size (64)));
+typedef int V __attribute__((vector_size (64)));
+U a, b;
+
+extern void bar (void);
+
+V
+foo (V f)
+{
+  b <<= (U){(V){}[63]} & 7;
+  bar ();
+  a = (U)f & 7;
+  return (V)b;
+}