2020-02-05 Jakub Jelinek <jakub@redhat.com>
+ PR target/92190
+ * config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper): Only
+ include sets and not clobbers in the vzeroupper pattern.
+ * config/i386/sse.md (*avx_vzeroupper): Require in insn condition that
+ the parallel has 17 (64-bit) or 9 (32-bit) elts.
+ (*avx_vzeroupper_1): New define_insn_and_split.
+
PR target/92190
* recog.c (pass_split_after_reload::gate): For STACK_REGS targets,
don't run when !optimize.
(set (reg:V2DF R) (reg:V2DF R))
- which preserves the low 128 bits but clobbers the upper bits.
- For a dead register we just use:
-
- (clobber (reg:V2DF R))
-
- which invalidates any previous contents of R and stops R from becoming
- live across the vzeroupper in future. */
+ which preserves the low 128 bits but clobbers the upper bits. */
static void
ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
{
rtx pattern = PATTERN (insn);
unsigned int nregs = TARGET_64BIT ? 16 : 8;
- rtvec vec = rtvec_alloc (nregs + 1);
- RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
+ unsigned int npats = nregs;
for (unsigned int i = 0; i < nregs; ++i)
{
unsigned int regno = GET_SSE_REGNO (i);
+ if (!bitmap_bit_p (live_regs, regno))
+ npats--;
+ }
+ if (npats == 0)
+ return;
+ rtvec vec = rtvec_alloc (npats + 1);
+ RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
+ for (unsigned int i = 0, j = 0; i < nregs; ++i)
+ {
+ unsigned int regno = GET_SSE_REGNO (i);
+ if (!bitmap_bit_p (live_regs, regno))
+ continue;
rtx reg = gen_rtx_REG (V2DImode, regno);
- if (bitmap_bit_p (live_regs, regno))
- RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
- else
- RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
+ ++j;
+ RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
}
XVEC (pattern, 0) = vec;
df_insn_rescan (insn);
(define_insn "*avx_vzeroupper"
[(match_parallel 0 "vzeroupper_pattern"
[(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
- "TARGET_AVX"
+ "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
"vzeroupper"
[(set_attr "type" "sse")
(set_attr "modrm" "0")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "OI")])
+(define_insn_and_split "*avx_vzeroupper_1"
+ [(match_parallel 0 "vzeroupper_pattern"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
+ "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
+ "#"
+ "&& epilogue_completed"
+ [(match_dup 0)]
+{
+ /* For IPA-RA purposes, make it clear the instruction clobbers
+ even XMM registers not mentioned explicitly in the pattern. */
+ unsigned int nregs = TARGET_64BIT ? 16 : 8;
+ unsigned int npats = XVECLEN (operands[0], 0);
+ rtvec vec = rtvec_alloc (nregs + 1);
+ RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
+ for (unsigned int i = 0, j = 1; i < nregs; ++i)
+ {
+ unsigned int regno = GET_SSE_REGNO (i);
+ if (j < npats
+ && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
+ {
+ RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
+ j++;
+ }
+ else
+ {
+ rtx reg = gen_rtx_REG (V2DImode, regno);
+ RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
+ }
+ }
+ operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
+}
+ [(set_attr "type" "sse")
+ (set_attr "modrm" "0")
+ (set_attr "memory" "none")
+ (set_attr "prefix" "vex")
+ (set_attr "btver2_decode" "vector")
+ (set_attr "mode" "OI")])
+
(define_mode_attr pbroadcast_evex_isa
[(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
(V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+2020-02-05 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/92190
+ * gcc.target/i386/pr92190.c: New test.
+
2020-02-05 Richard Biener <rguenther@suse.de>
PR testsuite/92177
--- /dev/null
+/* PR target/92190 */
+/* { dg-do compile { target { *-*-linux* && lp64 } } } */
+/* { dg-options "-mabi=ms -O2 -mavx512f" } */
+
+typedef char VC __attribute__((vector_size (16)));
+typedef int VI __attribute__((vector_size (16 * sizeof 0)));
+VC a;
+VI b;
+void bar (VI);
+void baz (VC);
+
+void
+foo (void)
+{
+ VC k = a;
+ VI n = b;
+ bar (n);
+ baz (k);
+}