+2019-10-07 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR target/91994
+ * config/i386/sse.md (avx_vzeroupper): Turn into a define_expand
+ and wrap the unspec_volatile in a parallel.
+ (*avx_vzeroupper): New define_insn. Use a match_parallel around
+ the unspec_volatile.
+ * config/i386/predicates.md (vzeroupper_pattern): Expect the
+ unspec_volatile to be wrapped in a parallel.
+ * config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper)
+ (ix86_add_reg_usage_to_vzerouppers): New functions.
+ (rest_of_handle_insert_vzeroupper): Use them to add register
+ usage information to the vzeroupper instructions.
+
2019-10-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/91975
return 0;
}
+/* Modify the vzeroupper pattern in INSN so that it describes the effect
+ that the instruction has on the SSE registers. LIVE_REGS are the set
+ of registers that are live across the instruction.
+
+ For a live register R we use:
+
+ (set (reg:V2DF R) (reg:V2DF R))
+
+ which preserves the low 128 bits but clobbers the upper bits.
+ For a dead register we just use:
+
+ (clobber (reg:V2DF R))
+
+ which invalidates any previous contents of R and stops R from becoming
+ live across the vzeroupper in future. */
+
+static void
+ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
+{
+ rtx pattern = PATTERN (insn);
+ unsigned int nregs = TARGET_64BIT ? 16 : 8;
+ rtvec vec = rtvec_alloc (nregs + 1);
+ RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
+ for (unsigned int i = 0; i < nregs; ++i)
+ {
+ unsigned int regno = GET_SSE_REGNO (i);
+ rtx reg = gen_rtx_REG (V2DImode, regno);
+ if (bitmap_bit_p (live_regs, regno))
+ RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg);
+ else
+ RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
+ }
+ XVEC (pattern, 0) = vec;
+ df_insn_rescan (insn);
+}
+
+/* Walk the vzeroupper instructions in the function and annotate them
+ with the effect that they have on the SSE registers. */
+
+static void
+ix86_add_reg_usage_to_vzerouppers (void)
+{
+ basic_block bb;
+ rtx_insn *insn;
+ auto_bitmap live_regs;
+
+ df_analyze ();
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ bitmap_copy (live_regs, df_get_live_out (bb));
+ df_simulate_initialize_backwards (bb, live_regs);
+ FOR_BB_INSNS_REVERSE (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+ if (vzeroupper_pattern (PATTERN (insn), VOIDmode))
+ ix86_add_reg_usage_to_vzeroupper (insn, live_regs);
+ df_simulate_one_insn_backwards (bb, insn, live_regs);
+ }
+ }
+}
+
static unsigned int
rest_of_handle_insert_vzeroupper (void)
{
/* Call optimize_mode_switching. */
g->get_passes ()->execute_pass_mode_switching ();
+ ix86_add_reg_usage_to_vzerouppers ();
return 0;
}
;; return true if OP is a vzeroupper pattern.
(define_predicate "vzeroupper_pattern"
- (and (match_code "unspec_volatile")
- (match_test "XINT (op, 1) == UNSPECV_VZEROUPPER")))
+ (and (match_code "parallel")
+ (match_code "unspec_volatile" "a")
+ (match_test "XINT (XVECEXP (op, 0, 0), 1) == UNSPECV_VZEROUPPER")))
;; Return true if OP is an addsub vec_merge operation
(define_predicate "addsub_vm_operator"
(set_attr "mode" "OI")])
;; Clear the upper 128bits of AVX registers, equivalent to a NOP
-;; if the upper 128bits are unused.
-(define_insn "avx_vzeroupper"
- [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
+;; if the upper 128bits are unused. Initially we expand the instructions
+;; as though they had no effect on the SSE registers, but later add SETs and
+;; CLOBBERs to the PARALLEL to model the real effect.
+(define_expand "avx_vzeroupper"
+ [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
+ "TARGET_AVX")
+
+(define_insn "*avx_vzeroupper"
+ [(match_parallel 0 "vzeroupper_pattern"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
"TARGET_AVX"
"vzeroupper"
[(set_attr "type" "sse")
+2019-10-07 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR target/91994
+ * gcc.target/i386/pr91994.c: New test.
+
2019-10-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/91975
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx -mvzeroupper" } */
+
+#include "avx-check.h"
+
+#include <immintrin.h>
+
+__m256i x1, x2, x3;
+
+__attribute__ ((noinline))
+static void
+foo (void)
+{
+ x1 = x2;
+}
+
+void
+bar (void)
+{
+ __m256i x = x1;
+ foo ();
+ x3 = x;
+}
+
+__attribute__ ((noinline))
+void
+avx_test (void)
+{
+ __m256i x = _mm256_set1_epi8 (3);
+ x1 = x;
+ bar ();
+ if (__builtin_memcmp (&x3, &x, sizeof (x)))
+ __builtin_abort ();
+}