From: Bill Schmidt Date: Thu, 10 Sep 2015 20:22:37 +0000 (+0000) Subject: rs6000.c (swap_web_entry): Update preceding commentary to simplify permute mask adjus... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9a21c05d8d2a58a23782dbcedb4aa6df62282aa0;p=gcc.git rs6000.c (swap_web_entry): Update preceding commentary to simplify permute mask adjustment equation. [gcc] 2015-09-10 Bill Schmidt * config/rs6000/rs6000.c (swap_web_entry): Update preceding commentary to simplify permute mask adjustment equation. (special_handling_values): Add SH_VPERM. (const_load_sequence_p): New function. (insn_is_swappable_p): Add logic to recognize an UNSPEC_VPERM with the mask loaded from the constant pool. (adjust_vperm): New function. (handle_special_swappables): Call adjust_vperm. (dump_swap_insn_table): Handle SH_VPERM. [gcc/testsuite] 2015-09-10 Bill Schmidt * gcc.target/powerpc/swaps-p8-20.c: New test. * gcc.target/powerpc/swaps-p8-21.c: New test. From-SVN: r227664 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d47b1465953..7f15326d40a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2015-09-10 Bill Schmidt + + * config/rs6000/rs6000.c (swap_web_entry): Update preceding + commentary to simplify permute mask adjustment equation. + (special_handling_values): Add SH_VPERM. + (const_load_sequence_p): New function. + (insn_is_swappable_p): Add logic to recognize an UNSPEC_VPERM with + the mask loaded from the constant pool. + (adjust_vperm): New function. + (handle_special_swappables): Call adjust_vperm. + (dump_swap_insn_table): Handle SH_VPERM. + 2015-09-10 H.J. Lu * shrink-wrap.c (requires_stack_frame_p): Remove static. diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 941592af744..7278792d0dc 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -34943,10 +34943,8 @@ emit_fusion_gpr_load (rtx target, rtx mem) throughout the computation, we can get correct behavior by replacing M with M' as follows: - { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23] - M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31] - { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23] - { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31] + M'[i] = { (M[i]+8)%16 : M[i] in [0,15] + { ((M[i]+8)%16)+16 : M[i] in [16,31] This seems promising at first, since we are just replacing one mask with another. But certain masks are preferable to others. If M @@ -34964,7 +34962,11 @@ emit_fusion_gpr_load (rtx target, rtx mem) mask to be produced by an UNSPEC_LVSL, in which case the mask cannot be known at compile time. In such a case we would have to generate several instructions to compute M' as above at run time, - and a cost model is needed again. */ + and a cost model is needed again. + + However, when the mask M for an UNSPEC_VPERM is loaded from the + constant pool, we can replace M with M' as above at no cost + beyond adding a constant pool entry. */ /* This is based on the union-find logic in web.c. web_entry_base is defined in df.h. */ @@ -35016,7 +35018,8 @@ enum special_handling_values { SH_EXTRACT, SH_SPLAT, SH_XXPERMDI, - SH_CONCAT + SH_CONCAT, + SH_VPERM }; /* Union INSN with all insns containing definitions that reach USE. @@ -35151,6 +35154,64 @@ insn_is_swap_p (rtx insn) return 1; } +/* Return TRUE if insn is a swap fed by a load from the constant pool. */ +static bool +const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) +{ + unsigned uid = INSN_UID (insn); + if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) + return false; + + /* Find the unique use in the swap and locate its def. If the def + isn't unique, punt. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + if (!def_link || def_link->next) + return false; + + rtx def_insn = DF_REF_INSN (def_link->ref); + unsigned uid2 = INSN_UID (def_insn); + if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) + return false; + + rtx body = PATTERN (def_insn); + if (GET_CODE (body) != SET + || GET_CODE (SET_SRC (body)) != VEC_SELECT + || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM) + return false; + + rtx mem = XEXP (SET_SRC (body), 0); + rtx base_reg = XEXP (mem, 0); + + df_ref base_use; + insn_info = DF_INSN_INFO_GET (def_insn); + FOR_EACH_INSN_INFO_USE (base_use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) + continue; + + struct df_link *base_def_link = DF_REF_CHAIN (base_use); + if (!base_def_link || base_def_link->next) + return false; + + rtx tocrel_insn = DF_REF_INSN (base_def_link->ref); + rtx tocrel_body = PATTERN (tocrel_insn); + rtx base, offset; + if (GET_CODE (tocrel_body) != SET) + return false; + if (!toc_relative_expr_p (SET_SRC (tocrel_body), false)) + return false; + split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); + if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base)) + return false; + } + } + return true; +} + /* Return 1 iff OP is an operand that will not be affected by having vector doublewords swapped in memory. */ static unsigned int @@ -35410,6 +35471,32 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, return 1; } + /* An UNSPEC_VPERM is ok if the mask operand is loaded from the + constant pool. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == UNSPEC + && XINT (SET_SRC (body), 1) == UNSPEC_VPERM + && XVECLEN (SET_SRC (body), 0) == 3 + && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG) + { + rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), mask_reg)) + { + struct df_link *def_link = DF_REF_CHAIN (use); + /* Punt if multiple definitions for this reg. */ + if (def_link && !def_link->next && + const_load_sequence_p (insn_entry, + DF_REF_INSN (def_link->ref))) + { + *special = SH_VPERM; + return 1; + } + } + } + /* Otherwise check the operands for vector lane violations. */ return rtx_is_swappable_p (body, special); } @@ -35742,6 +35829,105 @@ adjust_concat (rtx_insn *insn) fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); } +/* Given an UNSPEC_VPERM insn, modify the mask loaded from the + constant pool to reflect swapped doublewords. */ +static void +adjust_vperm (rtx_insn *insn) +{ + /* We previously determined that the UNSPEC_VPERM was fed by a + swap of a swapping load of a TOC-relative constant pool symbol. + Find the MEM in the swapping load and replace it with a MEM for + the adjusted mask constant. */ + rtx set = PATTERN (insn); + rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); + + /* Find the swap. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + rtx_insn *swap_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), mask_reg)) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + swap_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (swap_insn); + + /* Find the load. */ + insn_info = DF_INSN_INFO_GET (swap_insn); + rtx_insn *load_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + load_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (load_insn); + + /* Find the TOC-relative symbol access. */ + insn_info = DF_INSN_INFO_GET (load_insn); + rtx_insn *tocrel_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + tocrel_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (tocrel_insn); + + /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p + to set tocrel_base; otherwise it would be unnecessary as we've + already established it will return true. */ + rtx base, offset; + if (!toc_relative_expr_p (SET_SRC (PATTERN (tocrel_insn)), false)) + gcc_unreachable (); + split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); + rtx const_vector = get_pool_constant (base); + gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); + + /* Create an adjusted mask from the initial mask. */ + unsigned int new_mask[16], i, val; + for (i = 0; i < 16; ++i) { + val = INTVAL (XVECEXP (const_vector, 0, i)); + if (val < 16) + new_mask[i] = (val + 8) % 16; + else + new_mask[i] = ((val + 8) % 16) + 16; + } + + /* Create a new CONST_VECTOR and a MEM that references it. */ + rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); + for (i = 0; i < 16; ++i) + XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); + rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); + rtx new_mem = force_const_mem (V16QImode, new_const_vector); + /* This gives us a MEM whose base operand is a SYMBOL_REF, which we + can't recognize. Force the SYMBOL_REF into a register. */ + if (!REG_P (XEXP (new_mem, 0))) { + rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); + XEXP (new_mem, 0) = base_reg; + /* Move the newly created insn ahead of the load insn. */ + rtx_insn *force_insn = get_last_insn (); + remove_insn (force_insn); + rtx_insn *before_load_insn = PREV_INSN (load_insn); + add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); + df_insn_rescan (before_load_insn); + df_insn_rescan (force_insn); + } + + /* Replace the MEM in the load instruction and rescan it. */ + XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; + INSN_CODE (load_insn) = -1; /* Force re-recognition. */ + df_insn_rescan (load_insn); + + if (dump_file) + fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); +} + /* The insn described by INSN_ENTRY[I] can be swapped, but only with special handling. Take care of that here. */ static void @@ -35796,6 +35982,10 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) /* Reverse the order of a concatenation operation. */ adjust_concat (insn); break; + case SH_VPERM: + /* Change the mask loaded from the constant pool for a VPERM. */ + adjust_vperm (insn); + break; } } @@ -35872,6 +36062,8 @@ dump_swap_insn_table (swap_web_entry *insn_entry) fputs ("special:xxpermdi ", dump_file); else if (insn_entry[i].special_handling == SH_CONCAT) fputs ("special:concat ", dump_file); + else if (insn_entry[i].special_handling == SH_VPERM) + fputs ("special:vperm ", dump_file); } if (insn_entry[i].web_not_optimizable) fputs ("unoptimizable ", dump_file); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index db2f374987d..4772dc0d426 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-09-10 Bill Schmidt + + * gcc.target/powerpc/swaps-p8-20.c: New test. + * gcc.target/powerpc/swaps-p8-21.c: New test. + 2015-09-10 Steven G. Kargl PR fortran/67526 diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c new file mode 100644 index 00000000000..7463781281e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c @@ -0,0 +1,29 @@ +/* { dg-do run { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O2 -mcpu=power8 -maltivec" } */ + +/* The expansion for vector character multiply introduces a vperm operation. + This tests that the swap optimization to remove swaps by changing the + vperm mask results in correct code. */ + +#include + +void abort (); + +vector unsigned char r; +vector unsigned char v = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +vector unsigned char i = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; +vector unsigned char e = + {0, 2, 6, 12, 20, 30, 42, 56, 72, 90, 110, 132, 156, 182, 210, 240}; + +int main () +{ + int j; + r = v * i; + if (!vec_all_eq (r, e)) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-21.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-21.c new file mode 100644 index 00000000000..b981fcf2f36 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-21.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } +/* { dg-options "-O2 -mcpu=power8 -maltivec" } */ + +/* The expansion for vector character multiply introduces a vperm operation. + This tests that changing the vperm mask allows us to remove all swaps + from the generated code. */ + +#include + +void abort (); + +vector unsigned char r; +vector unsigned char v = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +vector unsigned char i = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + +int main () +{ + int j; + r = v * i; + return 0; +} + +/* { dg-final { scan-assembler-times "vperm" 1 } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */