From 683be46fa65feb540990b6cfea729f4234d71298 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Wed, 12 Aug 2015 21:54:23 +0000 Subject: [PATCH] re PR target/67071 (GCC misses an optimization to load vector constants) [gcc] 2015-08-12 Michael Meissner PR target/67071 * config/rs6000/predicates.md (easy_vector_constant_vsldoi): New predicate to allow construction of vector constants using the VSLDOI vector shift instruction. * config/rs6000/rs6000-protos.h (vspltis_shifted): Add declaration. * config/rs6000/rs6000.c (vspltis_shifted): New function to return the number of bytes to be shifted left and filled in with either all zero or all one bits. (gen_easy_altivec_constant): Call vsplitis_shifted if no other methods exist. (output_vec_const_move): On power8, generate XXLORC to generate a vector constant with all 1's. Do a split if we need to use a VSLDOI instruction. * config/rs6000/rs6000.h (EASY_VECTOR_MSB): Use mode mask to properly test for the MSB. * config/rs6000/altivec.md (VSLDOI splitter): Add splitter for vector constants that can be created with VSLDOI. [gcc/testsuite] 2015-08-12 Michael Meissner PR target/67071 * gcc.target/powerpc/pr67071-1.c: New file to test PR 67071 new vector constants. * gcc.target/powerpc/pr67071-2.c: Likewise. * gcc.target/powerpc/pr67071-3.c: Likewise. From-SVN: r226836 --- gcc/ChangeLog | 25 +++++ gcc/config/rs6000/altivec.md | 41 ++++++++ gcc/config/rs6000/predicates.md | 8 ++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 105 ++++++++++++++++++- gcc/config/rs6000/rs6000.h | 2 +- gcc/testsuite/ChangeLog | 8 ++ gcc/testsuite/gcc.target/powerpc/pr67071-1.c | 34 ++++++ gcc/testsuite/gcc.target/powerpc/pr67071-2.c | 48 +++++++++ gcc/testsuite/gcc.target/powerpc/pr67071-3.c | 48 +++++++++ 10 files changed, 317 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr67071-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr67071-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr67071-3.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e968d2d6726..65a4f2a0012 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2015-08-12 Michael Meissner + + PR target/67071 + * config/rs6000/predicates.md (easy_vector_constant_vsldoi): New + predicate to allow construction of vector constants using the + VSLDOI vector shift instruction. + + * config/rs6000/rs6000-protos.h (vspltis_shifted): Add + declaration. + + * config/rs6000/rs6000.c (vspltis_shifted): New function to return + the number of bytes to be shifted left and filled in with either + all zero or all one bits. + (gen_easy_altivec_constant): Call vsplitis_shifted if no other + methods exist. + (output_vec_const_move): On power8, generate XXLORC to generate + a vector constant with all 1's. Do a split if we need to use a + VSLDOI instruction. + + * config/rs6000/rs6000.h (EASY_VECTOR_MSB): Use mode mask to + properly test for the MSB. + + * config/rs6000/altivec.md (VSLDOI splitter): Add splitter for + vector constants that can be created with VSLDOI. + 2015-08-11 Trevor Saunders revert: diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index a8ea0fbdfe1..4170f38b7db 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -311,6 +311,47 @@ operands[4] = gen_rtx_PLUS (op_mode, operands[0], operands[0]); }) +(define_split + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_vsldoi" ""))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) && can_create_pseudo_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 0) + (unspec:VM [(match_dup 2) + (match_dup 4) + (match_dup 6)] + UNSPEC_VSLDOI))] +{ + rtx op1 = operands[1]; + int elt = (BYTES_BIG_ENDIAN) ? 0 : GET_MODE_NUNITS (mode) - 1; + HOST_WIDE_INT val = const_vector_elt_as_int (op1, elt); + rtx rtx_val = GEN_INT (val); + int shift = vspltis_shifted (op1); + int nunits = GET_MODE_NUNITS (mode); + int i; + + gcc_assert (shift != 0); + operands[2] = gen_reg_rtx (mode); + operands[3] = gen_rtx_CONST_VECTOR (mode, rtvec_alloc (nunits)); + operands[4] = gen_reg_rtx (mode); + + if (shift < 0) + { + operands[5] = CONSTM1_RTX (mode); + operands[6] = GEN_INT (-shift); + } + else + { + operands[5] = CONST0_RTX (mode); + operands[6] = GEN_INT (shift); + } + + /* Populate the constant vectors. */ + for (i = 0; i < nunits; i++) + XVECEXP (operands[3], 0, i) = rtx_val; +}) + (define_insn "get_vrsave_internal" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))] diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 5a6bb70c59f..ae74796849d 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -562,6 +562,14 @@ return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode)); }) +;; Return true if this is an easy altivec constant that we form +;; by using VSLDOI. +(define_predicate "easy_vector_constant_vsldoi" + (and (match_code "const_vector") + (and (match_test "TARGET_ALTIVEC") + (and (match_test "easy_altivec_constant (op, mode)") + (match_test "vspltis_shifted (op) != 0"))))) + ;; Return 1 if operand is constant zero (scalars and vectors). (define_predicate "zero_constant" (and (match_code "const_int,const_double,const_wide_int,const_vector") diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index f5d34766990..7262a151438 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -31,6 +31,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int, #endif /* TREE_CODE */ extern bool easy_altivec_constant (rtx, machine_mode); +extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); extern int num_insns_constant (rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e37ef9fdad5..d1326eba47d 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5449,6 +5449,96 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) return true; } +/* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI + instruction, filling in the bottom elements with 0 or -1. + + Return 0 if the constant cannot be generated with VSLDOI. Return positive + for the number of zeroes to shift in, or negative for the number of 0xff + bytes to shift in. + + OP is a CONST_VECTOR. */ + +int +vspltis_shifted (rtx op) +{ + machine_mode mode = GET_MODE (op); + machine_mode inner = GET_MODE_INNER (mode); + + unsigned i, j; + unsigned nunits; + unsigned mask; + + HOST_WIDE_INT val; + + if (mode != V16QImode && mode != V8HImode && mode != V4SImode) + return false; + + /* We need to create pseudo registers to do the shift, so don't recognize + shift vector constants after reload. */ + if (!can_create_pseudo_p ()) + return false; + + nunits = GET_MODE_NUNITS (mode); + mask = GET_MODE_MASK (inner); + + val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1); + + /* Check if the value can really be the operand of a vspltis[bhw]. */ + if (EASY_VECTOR_15 (val)) + ; + + /* Also check if we are loading up the most significant bit which can be done + by loading up -1 and shifting the value left by -1. */ + else if (EASY_VECTOR_MSB (val, inner)) + ; + + else + return 0; + + /* Check if VAL is present in every STEP-th element until we find elements + that are 0 or all 1 bits. */ + for (i = 1; i < nunits; ++i) + { + unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i; + HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); + + /* If the value isn't the splat value, check for the remaining elements + being 0/-1. */ + if (val != elt_val) + { + if (elt_val == 0) + { + for (j = i+1; j < nunits; ++j) + { + unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; + if (const_vector_elt_as_int (op, elt2) != 0) + return 0; + } + + return (nunits - i) * GET_MODE_SIZE (inner); + } + + else if ((elt_val & mask) == mask) + { + for (j = i+1; j < nunits; ++j) + { + unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; + if ((const_vector_elt_as_int (op, elt2) & mask) != mask) + return 0; + } + + return -((nunits - i) * GET_MODE_SIZE (inner)); + } + + else + return 0; + } + } + + /* If all elements are equal, we don't need to do VLSDOI. */ + return 0; +} + /* Return true if OP is of the given MODE and can be synthesized with a vspltisb, vspltish or vspltisw. */ @@ -5513,6 +5603,9 @@ easy_altivec_constant (rtx op, machine_mode mode) if (vspltis_constant (op, step, copies)) return true; + if (vspltis_shifted (op) != 0) + return true; + return false; } @@ -5556,7 +5649,7 @@ gen_easy_altivec_constant (rtx op) const char * output_vec_const_move (rtx *operands) { - int cst, cst2; + int cst, cst2, shift; machine_mode mode; rtx dest, vec; @@ -5569,10 +5662,13 @@ output_vec_const_move (rtx *operands) if (zero_constant (vec, mode)) return "xxlxor %x0,%x0,%x0"; + if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode)) + return "xxlorc %x0,%x0,%x0"; + if ((mode == V2DImode || mode == V1TImode) && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1) - return "vspltisw %0,-1"; + return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1"; } if (TARGET_ALTIVEC) @@ -5581,6 +5677,11 @@ output_vec_const_move (rtx *operands) if (zero_constant (vec, mode)) return "vxor %0,%0,%0"; + /* Do we need to construct a value using VSLDOI? */ + shift = vspltis_shifted (vec); + if (shift != 0) + return "#"; + splat_vec = gen_easy_altivec_constant (vec); gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); operands[1] = XEXP (splat_vec, 0); diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 102bbce1525..e4f29373252 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1980,7 +1980,7 @@ typedef struct rs6000_args && ((n) & 1) == 0) #define EASY_VECTOR_MSB(n,mode) \ - (((unsigned HOST_WIDE_INT)n) == \ + ((((unsigned HOST_WIDE_INT) (n)) & GET_MODE_MASK (mode)) == \ ((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0a8679bf8a5..20735b946b6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2015-08-12 Michael Meissner + + PR target/67071 + * gcc.target/powerpc/pr67071-1.c: New file to test PR 67071 new + vector constants. + * gcc.target/powerpc/pr67071-2.c: Likewise. + * gcc.target/powerpc/pr67071-3.c: Likewise. + 2015-08-12 Marek Polacek PR c++/55095 diff --git a/gcc/testsuite/gcc.target/powerpc/pr67071-1.c b/gcc/testsuite/gcc.target/powerpc/pr67071-1.c new file mode 100644 index 00000000000..c50b4e55f7b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr67071-1.c @@ -0,0 +1,34 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ +/* { dg-options "-mcpu=power6 -maltivec" } */ + +vector unsigned char +foo_char (void) +{ + return (vector unsigned char) { + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + }; +} + +vector unsigned short +foo_short (void) +{ + return (vector unsigned short) { + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 + }; +} + +vector unsigned int +foo_int (void) +{ + return (vector unsigned int) { + 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u, + }; +} + +/* { dg-final { scan-assembler-times "vspltisw" 3 } } */ +/* { dg-final { scan-assembler-times "vslb" 1 } } */ +/* { dg-final { scan-assembler-times "vslh" 1 } } */ +/* { dg-final { scan-assembler-times "vslw" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr67071-2.c b/gcc/testsuite/gcc.target/powerpc/pr67071-2.c new file mode 100644 index 00000000000..9c34b3e799f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr67071-2.c @@ -0,0 +1,48 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ +/* { dg-options "-mcpu=power6 -maltivec" } */ + +vector unsigned char +foo_char (void) +{ + return (vector unsigned char) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +#else + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 +#endif + }; +} + +vector unsigned short +foo_short (void) +{ + return (vector unsigned short) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +#else + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8000 +#endif + }; +} + +vector unsigned int +foo_int (void) +{ + return (vector unsigned int) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80000000u, 0x00000000u, 0x00000000u, 0x00000000u, +#else + 0x00000000u, 0x00000000u, 0x00000000u, 0x80000000u, +#endif + }; +} + +/* { dg-final { scan-assembler-times "vspltisw" 3 } } */ +/* { dg-final { scan-assembler-times "vsldoi" 3 } } */ +/* { dg-final { scan-assembler-times "vslb" 1 } } */ +/* { dg-final { scan-assembler-times "vslh" 1 } } */ +/* { dg-final { scan-assembler-times "vslw" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr67071-3.c b/gcc/testsuite/gcc.target/powerpc/pr67071-3.c new file mode 100644 index 00000000000..470afb787a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr67071-3.c @@ -0,0 +1,48 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ +/* { dg-options "-mcpu=power6 -maltivec" } */ + + +vector unsigned char +foo_char (void) +{ + return (vector unsigned char) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +#else + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x80, 0x80 +#endif + }; +} + +vector unsigned short +foo_short (void) +{ + return (vector unsigned short) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x8000, 0x8000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff +#else + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8000, 0x8000 +#endif + }; +} + +vector unsigned int +foo_int (void) +{ + return (vector unsigned int) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80000000u, 0x80000000u, 0xffffffffu, 0xffffffffu, +#else + 0xffffffffu, 0xffffffffu, 0x80000000u, 0x80000000u, +#endif + }; +} + +/* { dg-final { scan-assembler-times "vslb" 1 } } */ +/* { dg-final { scan-assembler-times "vslh" 1 } } */ +/* { dg-final { scan-assembler-times "vslw" 1 } } */ +/* { dg-final { scan-assembler-times "vsldoi" 3 } } */ -- 2.30.2