i386: Better patch to improve avx* vector concatenation [PR93594]
authorJakub Jelinek <jakub@redhat.com>
Fri, 7 Feb 2020 08:28:39 +0000 (09:28 +0100)
committerJakub Jelinek <jakub@redhat.com>
Fri, 7 Feb 2020 08:28:39 +0000 (09:28 +0100)
After thinking some more on this, we can do better; rather than having to
add a new prereload splitter pattern to catch all other cases where it might
be beneficial to fold first part of an UNSPEC_CAST back to the unspec
operand, this patch reverts the *.md changes I've made yesterday and instead
tweaks the patterns, so that simplify-rtx.c can optimize those on its own.
Instead of the whole SET_SRC being an UNSPEC through which simplify-rtx.c
obviously can't optimize anything, this represents those patterns through a
VEC_CONCAT (or two nested ones for the 128-bit -> 512-bit casts) with the
operand as the low part of it and UNSPEC representing just the high part of
it (the undefined, to be ignored, bits).  While richi suggested using
already in GIMPLE for those using a SSA_NAME default definition (i.e.
clearly uninitialized use), I'd say that uninit pass would warn about those,
but more importantly, in RTL it would probably force zero initialization of
that or use or an uninitialized pseudo, all of which is hard to match in an
pattern, so I think an UNSPEC is better for that.

2020-02-07  Jakub Jelinek  <jakub@redhat.com>

PR target/93594
* config/i386/predicates.md (avx_identity_operand): Remove.
* config/i386/sse.md (*avx_vec_concat<mode>_1): Remove.
(avx_<castmode><avxsizesuffix>_<castmode>,
avx512f_<castmode><avxsizesuffix>_256<castmode>): Change patterns to
a VEC_CONCAT of the operand and UNSPEC_CAST.
(avx512f_<castmode><avxsizesuffix>_<castmode>): Change pattern to
a VEC_CONCAT of VEC_CONCAT of the operand and UNSPEC_CAST with
UNSPEC_CAST.

gcc/ChangeLog
gcc/config/i386/predicates.md
gcc/config/i386/sse.md

index 56090a9e0b6ef0e3466f5c54f65a72338fb6c4e8..e56eaba7327fcd6a682bf607b198eb9540056818 100644 (file)
@@ -1,5 +1,15 @@
 2020-02-07  Jakub Jelinek  <jakub@redhat.com>
 
+       PR target/93594
+       * config/i386/predicates.md (avx_identity_operand): Remove.
+       * config/i386/sse.md (*avx_vec_concat<mode>_1): Remove.
+       (avx_<castmode><avxsizesuffix>_<castmode>,
+       avx512f_<castmode><avxsizesuffix>_256<castmode>): Change patterns to
+       a VEC_CONCAT of the operand and UNSPEC_CAST.
+       (avx512f_<castmode><avxsizesuffix>_<castmode>): Change pattern to
+       a VEC_CONCAT of VEC_CONCAT of the operand and UNSPEC_CAST with
+       UNSPEC_CAST.
+
        PR target/93611
        * config/i386/i386.c (ix86_lea_outperforms): Make sure to clear
        recog_data.insn if distance_non_agu_define changed it.
index 3ab9da45ffbba3c4fc1e03212397081d59e813fc..1119366d54eaaf5d374ffe40029837fc8eb5c943 100644 (file)
   return true;
 })
 
-;; Return true if OP is a parallel for identity permute.
-(define_predicate "avx_identity_operand"
-  (and (match_code "parallel")
-       (match_code "const_int" "a"))
-{
-  int i, nelt = XVECLEN (op, 0);
-
-  for (i = 0; i < nelt; ++i)
-    if (INTVAL (XVECEXP (op, 0, i)) != i)
-      return false;
-  return true;
-})
-
 ;; Return true if OP is a proper third operand to vpblendw256.
 (define_predicate "avx2_pblendw_operand"
   (match_code "const_int")
index cfd79a8354469097af79eab539ea72d35bb9f2b9..8879a65a7fdf641e3bbd4ec3c6b7c3322008459c 100644 (file)
 
 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
   [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
-       (unspec:AVX256MODE2P
-         [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
-         UNSPEC_CAST))]
+       (vec_concat:AVX256MODE2P
+         (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
+         (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
   "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
   "&& reload_completed"
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn_and_split "*avx_vec_concat<mode>_1"
-  [(set (match_operand:V_256_512 0 "register_operand")
-       (vec_concat:V_256_512
-         (vec_select:<ssehalfvecmode>
-           (unspec:V_256_512
-             [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand")]
-             UNSPEC_CAST)
-           (match_parallel 3 "avx_identity_operand"
-             [(match_operand 4 "const_int_operand")]))
-         (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand")))]
-  "TARGET_AVX
-   && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
-       || !MEM_P (operands[1]))
-   && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0) (vec_concat:V_256_512 (match_dup 1) (match_dup 2)))])
-
 (define_insn "vcvtph2ps<mask_name>"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
        (vec_select:V4SF
 
 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
   [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
-       (unspec:AVX512MODE2P
-         [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
-         UNSPEC_CAST))]
+       (vec_concat:AVX512MODE2P
+         (vec_concat:<ssehalfvecmode>
+           (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
+           (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
+         (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
   "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
   "&& reload_completed"
 
 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
   [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
-       (unspec:AVX512MODE2P
-         [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
-         UNSPEC_CAST))]
+       (vec_concat:AVX512MODE2P
+         (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
+         (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
   "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
   "&& reload_completed"