From e0d98ffd14487b7105de3d151122144086faa1fa Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Tue, 15 Sep 2020 22:32:55 -0500 Subject: [PATCH] rs6000: Remove useless insns fed into lvx/stvx [PR97019] This patch is to extend the existing function find_alignment_op to check all defintions of base_reg are AND operations with mask -16B to force the alignment. If all are satifised, it passes all AND operations and instructions to function recombine_lvx_pattern and recombine_stvx_pattern, they can remove all useless ANDs further. Bootstrapped/regtested on powerpc64le-linux-gnu P8. gcc/ChangeLog: PR target/97019 * config/rs6000/rs6000-p8swap.c (find_alignment_op): Adjust to support multiple defintions which are all AND operations with the mask -16B. (recombine_lvx_pattern): Adjust to handle multiple AND operations from find_alignment_op. (recombine_stvx_pattern): Likewise. gcc/testsuite/ChangeLog: PR target/97019 * gcc.target/powerpc/pr97019.c: New test. --- gcc/config/rs6000/rs6000-p8swap.c | 147 +++++++++++++-------- gcc/testsuite/gcc.target/powerpc/pr97019.c | 83 ++++++++++++ 2 files changed, 176 insertions(+), 54 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr97019.c diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index 3d5dc7d8aae..fff1b08afa6 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -2095,11 +2095,15 @@ alignment_mask (rtx_insn *insn) return alignment_with_canonical_addr (SET_SRC (body)); } -/* Given INSN that's a load or store based at BASE_REG, look for a - feeding computation that aligns its address on a 16-byte boundary. - Return the rtx and its containing AND_INSN. */ -static rtx -find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn) +/* Given INSN that's a load or store based at BASE_REG, check if + all of its feeding computations align its address on a 16-byte + boundary. If so, return true and add all definition insns into + AND_INSNS and their corresponding fully-expanded rtxes for the + masking operations into AND_OPS. */ + +static bool +find_alignment_op (rtx_insn *insn, rtx base_reg, vec *and_insns, + vec *and_ops) { df_ref base_use; struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); @@ -2111,19 +2115,28 @@ find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn) continue; struct df_link *base_def_link = DF_REF_CHAIN (base_use); - if (!base_def_link || base_def_link->next) - break; + if (!base_def_link) + return false; - /* With stack-protector code enabled, and possibly in other - circumstances, there may not be an associated insn for - the def. */ - if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) - break; + while (base_def_link) + { + /* With stack-protector code enabled, and possibly in other + circumstances, there may not be an associated insn for + the def. */ + if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) + return false; - *and_insn = DF_REF_INSN (base_def_link->ref); - and_operation = alignment_mask (*and_insn); - if (and_operation != 0) - break; + rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); + and_operation = alignment_mask (and_insn); + + /* Stop if we find any one which doesn't align. */ + if (!and_operation) + return false; + + and_insns->safe_push (and_insn); + and_ops->safe_push (and_operation); + base_def_link = base_def_link->next; + } } return and_operation; @@ -2143,11 +2156,14 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) rtx mem = XEXP (SET_SRC (body), 0); rtx base_reg = XEXP (mem, 0); - rtx_insn *and_insn; - rtx and_operation = find_alignment_op (insn, base_reg, &and_insn); + auto_vec and_insns; + auto_vec and_ops; + bool is_any_def_and + = find_alignment_op (insn, base_reg, &and_insns, &and_ops); - if (and_operation != 0) + if (is_any_def_and) { + gcc_assert (and_insns.length () == and_ops.length ()); df_ref def; struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); FOR_EACH_INSN_INFO_DEF (def, insn_info) @@ -2168,25 +2184,35 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) to_delete[INSN_UID (swap_insn)].replace = true; to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; - /* However, first we must be sure that we make the - base register from the AND operation available - in case the register has been overwritten. Copy - the base register to a new pseudo and use that - as the base register of the AND operation in - the new LVX instruction. */ - rtx and_base = XEXP (and_operation, 0); - rtx new_reg = gen_reg_rtx (GET_MODE (and_base)); - rtx copy = gen_rtx_SET (new_reg, and_base); - rtx_insn *new_insn = emit_insn_after (copy, and_insn); - set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); - df_insn_rescan (new_insn); - - XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg, - XEXP (and_operation, 1)); + rtx new_reg = 0; + rtx and_mask = 0; + for (unsigned i = 0; i < and_insns.length (); i++) + { + /* However, first we must be sure that we make the + base register from the AND operation available + in case the register has been overwritten. Copy + the base register to a new pseudo and use that + as the base register of the AND operation in + the new LVX instruction. */ + rtx_insn *and_insn = and_insns[i]; + rtx and_op = and_ops[i]; + rtx and_base = XEXP (and_op, 0); + if (!new_reg) + { + new_reg = gen_reg_rtx (GET_MODE (and_base)); + and_mask = XEXP (and_op, 1); + } + rtx copy = gen_rtx_SET (new_reg, and_base); + rtx_insn *new_insn = emit_insn_after (copy, and_insn); + set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); + df_insn_rescan (new_insn); + } + + XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask); SET_SRC (body) = mem; INSN_CODE (insn) = -1; /* Force re-recognition. */ df_insn_rescan (insn); - + if (dump_file) fprintf (dump_file, "lvx opportunity found at %d\n", INSN_UID (insn)); @@ -2205,11 +2231,14 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) rtx mem = SET_DEST (body); rtx base_reg = XEXP (mem, 0); - rtx_insn *and_insn; - rtx and_operation = find_alignment_op (insn, base_reg, &and_insn); + auto_vec and_insns; + auto_vec and_ops; + bool is_any_def_and + = find_alignment_op (insn, base_reg, &and_insns, &and_ops); - if (and_operation != 0) + if (is_any_def_and) { + gcc_assert (and_insns.length () == and_ops.length ()); rtx src_reg = XEXP (SET_SRC (body), 0); df_ref src_use; struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); @@ -2234,25 +2263,35 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) to_delete[INSN_UID (swap_insn)].replace = true; to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; - /* However, first we must be sure that we make the - base register from the AND operation available - in case the register has been overwritten. Copy - the base register to a new pseudo and use that - as the base register of the AND operation in - the new STVX instruction. */ - rtx and_base = XEXP (and_operation, 0); - rtx new_reg = gen_reg_rtx (GET_MODE (and_base)); - rtx copy = gen_rtx_SET (new_reg, and_base); - rtx_insn *new_insn = emit_insn_after (copy, and_insn); - set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); - df_insn_rescan (new_insn); - - XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg, - XEXP (and_operation, 1)); + rtx new_reg = 0; + rtx and_mask = 0; + for (unsigned i = 0; i < and_insns.length (); i++) + { + /* However, first we must be sure that we make the + base register from the AND operation available + in case the register has been overwritten. Copy + the base register to a new pseudo and use that + as the base register of the AND operation in + the new STVX instruction. */ + rtx_insn *and_insn = and_insns[i]; + rtx and_op = and_ops[i]; + rtx and_base = XEXP (and_op, 0); + if (!new_reg) + { + new_reg = gen_reg_rtx (GET_MODE (and_base)); + and_mask = XEXP (and_op, 1); + } + rtx copy = gen_rtx_SET (new_reg, and_base); + rtx_insn *new_insn = emit_insn_after (copy, and_insn); + set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); + df_insn_rescan (new_insn); + } + + XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask); SET_SRC (body) = src_reg; INSN_CODE (insn) = -1; /* Force re-recognition. */ df_insn_rescan (insn); - + if (dump_file) fprintf (dump_file, "stvx opportunity found at %d\n", INSN_UID (insn)); diff --git a/gcc/testsuite/gcc.target/powerpc/pr97019.c b/gcc/testsuite/gcc.target/powerpc/pr97019.c new file mode 100644 index 00000000000..81b1bda320f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c @@ -0,0 +1,83 @@ +/* This issue can only exist on little-endian P8 targets, since + the built-in functions vec_ld/vec_st can use lxvd2x/stxvd2x + (P8 big-endian) or lxv/stxv (P9 and later) for some cases, + those rldicr instructions fed to them are necessary. */ +/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ + +/* Test there are no useless instructions "rldicr x,y,0,59" + to align the addresses for lvx/stvx. */ + +extern int a, b, c; +extern vector unsigned long long ev5, ev6, ev7, ev8; +extern int dummy (vector unsigned long long); + +int test_vec_ld(unsigned char *pe) { + + vector unsigned long long v1, v2, v3, v4, v9; + vector unsigned long long v5 = ev5; + vector unsigned long long v6 = ev6; + vector unsigned long long v7 = ev7; + vector unsigned long long v8 = ev8; + + unsigned char *e = pe; + + do { + if (a) { + v1 = __builtin_vec_ld(16, (unsigned long long *)e); + v2 = __builtin_vec_ld(32, (unsigned long long *)e); + v3 = __builtin_vec_ld(48, (unsigned long long *)e); + e = e + 8; + for (int i = 0; i < a; i++) { + v4 = v5; + v5 = __builtin_crypto_vpmsumd(v1, v6); + v6 = __builtin_crypto_vpmsumd(v2, v7); + v7 = __builtin_crypto_vpmsumd(v3, v8); + e = e + 8; + } + } + v5 = __builtin_vec_ld(16, (unsigned long long *)e); + v6 = __builtin_vec_ld(32, (unsigned long long *)e); + v7 = __builtin_vec_ld(48, (unsigned long long *)e); + if (c) + b = 1; + } while (b); + + return dummy(v4); +} + +int test_vec_st(unsigned char *pe) { + + vector unsigned long long v1, v2, v3, v4; + vector unsigned long long v5 = ev5; + vector unsigned long long v6 = ev6; + vector unsigned long long v7 = ev7; + vector unsigned long long v8 = ev8; + + unsigned char *e = pe; + + do { + if (a) { + __builtin_vec_st(v1, 16, (unsigned long long *)e); + __builtin_vec_st(v2, 32, (unsigned long long *)e); + __builtin_vec_st(v3, 48, (unsigned long long *)e); + e = e + 8; + for (int i = 0; i < a; i++) { + v4 = v5; + v5 = __builtin_crypto_vpmsumd(v1, v6); + v6 = __builtin_crypto_vpmsumd(v2, v7); + v7 = __builtin_crypto_vpmsumd(v3, v8); + e = e + 8; + } + } + __builtin_vec_st(v5, 16, (unsigned long long *)e); + __builtin_vec_st(v6, 32, (unsigned long long *)e); + __builtin_vec_st(v7, 48, (unsigned long long *)e); + if (c) + b = 1; + } while (b); + + return dummy(v4); +} + +/* { dg-final { scan-assembler-not {(?n)rldicr.*,0,59} } } */ -- 2.30.2