From 3f156a6ce05f9e2b8cd37d1226c98d8b2be478ad Mon Sep 17 00:00:00 2001 From: Vladimir Makarov Date: Thu, 24 Nov 2016 19:54:27 +0000 Subject: [PATCH] re PR rtl-optimization/77541 (wrong code with 512bit vectors of int128 @ -O1) 2016-11-24 Vladimir Makarov PR rtl-optimization/77541 * lra-constraints.c (struct input_reload): Add field match_p. (get_reload_reg): Check modes of input reloads to generate unique value reload pseudo. (match_reload): Add input reload pseudo for the current insn. 2016-11-24 Vladimir Makarov PR rtl-optimization/77541 * gcc.target/i386/pr77541.c: New. From-SVN: r242848 --- gcc/ChangeLog | 8 +++ gcc/lra-constraints.c | 85 ++++++++++++++++--------- gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.target/i386/pr77541.c | 25 ++++++++ 4 files changed, 92 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr77541.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7fb4826bfd6..a20c48ac097 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2016-11-24 Vladimir Makarov + + PR rtl-optimization/77541 + * lra-constraints.c (struct input_reload): Add field match_p. + (get_reload_reg): Check modes of input reloads to generate unique + value reload pseudo. + (match_reload): Add input reload pseudo for the current insn. + 2016-11-24 James Greenhalgh * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Update diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 56b65ef81af..133b55ce0ba 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -529,6 +529,8 @@ init_curr_operand_mode (void) /* Structure describes input reload of the current insns. */ struct input_reload { + /* True for input reload of matched operands. */ + bool match_p; /* Reloaded value. */ rtx input; /* Reload pseudo used. */ @@ -563,6 +565,7 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original, { int i, regno; enum reg_class new_class; + bool unique_p = false; if (type == OP_OUT) { @@ -574,39 +577,53 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx original, e.g. volatile memory. */ if (! side_effects_p (original)) for (i = 0; i < curr_insn_input_reloads_num; i++) - if (rtx_equal_p (curr_insn_input_reloads[i].input, original) - && in_class_p (curr_insn_input_reloads[i].reg, rclass, &new_class)) - { - rtx reg = curr_insn_input_reloads[i].reg; - regno = REGNO (reg); - /* If input is equal to original and both are VOIDmode, - GET_MODE (reg) might be still different from mode. - Ensure we don't return *result_reg with wrong mode. */ - if (GET_MODE (reg) != mode) - { - if (in_subreg_p) - continue; - if (GET_MODE_SIZE (GET_MODE (reg)) < GET_MODE_SIZE (mode)) - continue; - reg = lowpart_subreg (mode, reg, GET_MODE (reg)); - if (reg == NULL_RTX || GET_CODE (reg) != SUBREG) - continue; - } - *result_reg = reg; - if (lra_dump_file != NULL) - { - fprintf (lra_dump_file, " Reuse r%d for reload ", regno); - dump_value_slim (lra_dump_file, original, 1); - } - if (new_class != lra_get_allocno_class (regno)) - lra_change_class (regno, new_class, ", change to", false); - if (lra_dump_file != NULL) - fprintf (lra_dump_file, "\n"); - return false; - } - *result_reg = lra_create_new_reg (mode, original, rclass, title); + { + if (! curr_insn_input_reloads[i].match_p + && rtx_equal_p (curr_insn_input_reloads[i].input, original) + && in_class_p (curr_insn_input_reloads[i].reg, rclass, &new_class)) + { + rtx reg = curr_insn_input_reloads[i].reg; + regno = REGNO (reg); + /* If input is equal to original and both are VOIDmode, + GET_MODE (reg) might be still different from mode. + Ensure we don't return *result_reg with wrong mode. */ + if (GET_MODE (reg) != mode) + { + if (in_subreg_p) + continue; + if (GET_MODE_SIZE (GET_MODE (reg)) < GET_MODE_SIZE (mode)) + continue; + reg = lowpart_subreg (mode, reg, GET_MODE (reg)); + if (reg == NULL_RTX || GET_CODE (reg) != SUBREG) + continue; + } + *result_reg = reg; + if (lra_dump_file != NULL) + { + fprintf (lra_dump_file, " Reuse r%d for reload ", regno); + dump_value_slim (lra_dump_file, original, 1); + } + if (new_class != lra_get_allocno_class (regno)) + lra_change_class (regno, new_class, ", change to", false); + if (lra_dump_file != NULL) + fprintf (lra_dump_file, "\n"); + return false; + } + /* If we have an input reload with a different mode, make sure it + will get a different hard reg. */ + else if (REG_P (original) + && REG_P (curr_insn_input_reloads[i].input) + && REGNO (original) == REGNO (curr_insn_input_reloads[i].input) + && (GET_MODE (original) + != GET_MODE (curr_insn_input_reloads[i].input))) + unique_p = true; + } + *result_reg = (unique_p + ? lra_create_new_reg_with_unique_value + : lra_create_new_reg) (mode, original, rclass, title); lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS); curr_insn_input_reloads[curr_insn_input_reloads_num].input = original; + curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = false; curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = *result_reg; return true; } @@ -1002,6 +1019,12 @@ match_reload (signed char out, signed char *ins, signed char *outs, lra_emit_move (copy_rtx (new_in_reg), in_rtx); *before = get_insns (); end_sequence (); + /* Add the new pseudo to consider values of subsequent input reload + pseudos. */ + lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS); + curr_insn_input_reloads[curr_insn_input_reloads_num].input = in_rtx; + curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = true; + curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = new_in_reg; for (i = 0; (in = ins[i]) >= 0; i++) { lra_assert diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 144cd3cbff0..0994787853c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-11-24 Vladimir Makarov + + PR rtl-optimization/77541 + * gcc.target/i386/pr77541.c: New. + 2016-11-24 Steven G. Kargl PR fortran/78500 diff --git a/gcc/testsuite/gcc.target/i386/pr77541.c b/gcc/testsuite/gcc.target/i386/pr77541.c new file mode 100644 index 00000000000..2c533bb3b7d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr77541.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -Wno-psabi" } */ + +#define MAGIC 0x0706050403020100 + +typedef unsigned long long u64; +typedef unsigned __int128 v64u128 __attribute__ ((vector_size (64))); + +v64u128 __attribute__ ((noinline, noclone)) +foo (u64 x1, u64 x2, u64 x3, u64 x4, v64u128 x5) +{ + (void)x1, (void)x2; + x4 >>= x4 & 63; + return x3 + x4 + x5; +} + +int +main () +{ + v64u128 x = foo (0, 0, 0, MAGIC, (v64u128) {}); + if (x[0] != MAGIC || x[1] != MAGIC || x[2] != MAGIC || x[3] != MAGIC) + __builtin_abort(); + return 0; +} -- 2.30.2