From: Richard Sandiford Date: Mon, 7 Oct 2019 08:36:06 +0000 (+0000) Subject: [i386] Make the vzeroupper pattern describe its effects (PR91994) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=698114484e668abf28c70d989bc93fa399dda9ac;p=gcc.git [i386] Make the vzeroupper pattern describe its effects (PR91994) The problem in this PR was that vzeroupper has an effect on register contents, but those effects weren't modelled in the rtl pattern, which was just an unspec_volatile. This patch fixes that by running a subpass after vzeroupper insertion to add SETs and CLOBBERs as appropriate. See the comments in the patch for more details. 2019-10-07 Richard Sandiford gcc/ PR target/91994 * config/i386/sse.md (avx_vzeroupper): Turn into a define_expand and wrap the unspec_volatile in a parallel. (*avx_vzeroupper): New define_insn. Use a match_parallel around the unspec_volatile. * config/i386/predicates.md (vzeroupper_pattern): Expect the unspec_volatile to be wrapped in a parallel. * config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper) (ix86_add_reg_usage_to_vzerouppers): New functions. (rest_of_handle_insert_vzeroupper): Use them to add register usage information to the vzeroupper instructions. gcc/testsuite/ PR target/91994 * gcc.target/i386/pr91994.c: New test. From-SVN: r276648 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ef1eb5973cc..ef8731fb3a0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2019-10-07 Richard Sandiford + + PR target/91994 + * config/i386/sse.md (avx_vzeroupper): Turn into a define_expand + and wrap the unspec_volatile in a parallel. + (*avx_vzeroupper): New define_insn. Use a match_parallel around + the unspec_volatile. + * config/i386/predicates.md (vzeroupper_pattern): Expect the + unspec_volatile to be wrapped in a parallel. + * config/i386/i386-features.c (ix86_add_reg_usage_to_vzeroupper) + (ix86_add_reg_usage_to_vzerouppers): New functions. + (rest_of_handle_insert_vzeroupper): Use them to add register + usage information to the vzeroupper instructions. + 2019-10-07 Richard Biener PR tree-optimization/91975 diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index 9b297bac191..4781a33a545 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -1757,6 +1757,68 @@ convert_scalars_to_vector (bool timode_p) return 0; } +/* Modify the vzeroupper pattern in INSN so that it describes the effect + that the instruction has on the SSE registers. LIVE_REGS are the set + of registers that are live across the instruction. + + For a live register R we use: + + (set (reg:V2DF R) (reg:V2DF R)) + + which preserves the low 128 bits but clobbers the upper bits. + For a dead register we just use: + + (clobber (reg:V2DF R)) + + which invalidates any previous contents of R and stops R from becoming + live across the vzeroupper in future. */ + +static void +ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs) +{ + rtx pattern = PATTERN (insn); + unsigned int nregs = TARGET_64BIT ? 16 : 8; + rtvec vec = rtvec_alloc (nregs + 1); + RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0); + for (unsigned int i = 0; i < nregs; ++i) + { + unsigned int regno = GET_SSE_REGNO (i); + rtx reg = gen_rtx_REG (V2DImode, regno); + if (bitmap_bit_p (live_regs, regno)) + RTVEC_ELT (vec, i + 1) = gen_rtx_SET (reg, reg); + else + RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg); + } + XVEC (pattern, 0) = vec; + df_insn_rescan (insn); +} + +/* Walk the vzeroupper instructions in the function and annotate them + with the effect that they have on the SSE registers. */ + +static void +ix86_add_reg_usage_to_vzerouppers (void) +{ + basic_block bb; + rtx_insn *insn; + auto_bitmap live_regs; + + df_analyze (); + FOR_EACH_BB_FN (bb, cfun) + { + bitmap_copy (live_regs, df_get_live_out (bb)); + df_simulate_initialize_backwards (bb, live_regs); + FOR_BB_INSNS_REVERSE (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + if (vzeroupper_pattern (PATTERN (insn), VOIDmode)) + ix86_add_reg_usage_to_vzeroupper (insn, live_regs); + df_simulate_one_insn_backwards (bb, insn, live_regs); + } + } +} + static unsigned int rest_of_handle_insert_vzeroupper (void) { @@ -1773,6 +1835,7 @@ rest_of_handle_insert_vzeroupper (void) /* Call optimize_mode_switching. */ g->get_passes ()->execute_pass_mode_switching (); + ix86_add_reg_usage_to_vzerouppers (); return 0; } diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 72f8e7eae3c..31f1ceabc3a 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1441,8 +1441,9 @@ ;; return true if OP is a vzeroupper pattern. (define_predicate "vzeroupper_pattern" - (and (match_code "unspec_volatile") - (match_test "XINT (op, 1) == UNSPECV_VZEROUPPER"))) + (and (match_code "parallel") + (match_code "unspec_volatile" "a") + (match_test "XINT (XVECEXP (op, 0, 0), 1) == UNSPECV_VZEROUPPER"))) ;; Return true if OP is an addsub vec_merge operation (define_predicate "addsub_vm_operator" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c7f539fb88f..07922a1bf97 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -19622,9 +19622,16 @@ (set_attr "mode" "OI")]) ;; Clear the upper 128bits of AVX registers, equivalent to a NOP -;; if the upper 128bits are unused. -(define_insn "avx_vzeroupper" - [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)] +;; if the upper 128bits are unused. Initially we expand the instructions +;; as though they had no effect on the SSE registers, but later add SETs and +;; CLOBBERs to the PARALLEL to model the real effect. +(define_expand "avx_vzeroupper" + [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])] + "TARGET_AVX") + +(define_insn "*avx_vzeroupper" + [(match_parallel 0 "vzeroupper_pattern" + [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])] "TARGET_AVX" "vzeroupper" [(set_attr "type" "sse") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e8e006061ec..01253be441d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-10-07 Richard Sandiford + + PR target/91994 + * gcc.target/i386/pr91994.c: New test. + 2019-10-07 Richard Biener PR tree-optimization/91975 diff --git a/gcc/testsuite/gcc.target/i386/pr91994.c b/gcc/testsuite/gcc.target/i386/pr91994.c new file mode 100644 index 00000000000..033be68e7d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91994.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx -mvzeroupper" } */ + +#include "avx-check.h" + +#include + +__m256i x1, x2, x3; + +__attribute__ ((noinline)) +static void +foo (void) +{ + x1 = x2; +} + +void +bar (void) +{ + __m256i x = x1; + foo (); + x3 = x; +} + +__attribute__ ((noinline)) +void +avx_test (void) +{ + __m256i x = _mm256_set1_epi8 (3); + x1 = x; + bar (); + if (__builtin_memcmp (&x3, &x, sizeof (x))) + __builtin_abort (); +}