From: Andreas Krebbel Date: Fri, 24 Jul 2015 11:28:06 +0000 (+0000) Subject: [PATCH] S/390: Improve risbg usage X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3d44ff9919d50db3a67bee88235bbdb9735ba34b;p=gcc.git [PATCH] S/390: Improve risbg usage gcc/ChangeLog: 2015-07-24 Andreas Krebbel * config/s390/s390.c (s390_rtx_costs): Make risbg patterns cheaper. (s390_expand_insv): Don't generate risbg pattern for constant zero sources. * config/s390/s390.md ("*insv_zEC12_appendbitsleft") ("*insv_z10_appendbitsleft"): New pattern definitions. New splitters. gcc/testsuite/ChangeLog: 2015-07-24 Andreas Krebbel * gcc.target/s390/insv-1.c: New test. * gcc.target/s390/insv-2.c: New test. * gcc.target/s390/insv-3.c: New test. From-SVN: r226148 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 00eab978555..f375a3a5943 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2015-07-24 Andreas Krebbel + + * config/s390/s390.c (s390_rtx_costs): Make risbg patterns + cheaper. + (s390_expand_insv): Don't generate risbg pattern for constant zero + sources. + * config/s390/s390.md ("*insv_zEC12_appendbitsleft") + ("*insv_z10_appendbitsleft"): New pattern definitions. New + splitters. + 2015-07-24 Dominik Vogt * config/s390/s390.c (s390_reorg): Clean up handling of processors diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 17642435cbc..f421e884c66 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -3321,13 +3321,26 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, *total = 0; return true; + case IOR: + /* risbg */ + if (GET_CODE (XEXP (x, 0)) == AND + && GET_CODE (XEXP (x, 1)) == ASHIFT + && REG_P (XEXP (XEXP (x, 0), 0)) + && REG_P (XEXP (XEXP (x, 1), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && CONST_INT_P (XEXP (XEXP (x, 1), 1)) + && (UINTVAL (XEXP (XEXP (x, 0), 1)) == + (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1)) + { + *total = COSTS_N_INSNS (2); + return true; + } case ASHIFT: case ASHIFTRT: case LSHIFTRT: case ROTATE: case ROTATERT: case AND: - case IOR: case XOR: case NEG: case NOT: @@ -5839,8 +5852,17 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) if (mode_s == VOIDmode) { - /* Assume const_int etc already in the proper mode. */ - src = force_reg (mode, src); + /* For constant zero values the representation with AND + appears to be folded in more situations than the (set + (zero_extract) ...). + We only do this when the start and end of the bitfield + remain in the same SImode chunk. That way nihf or nilf + can be used. + The AND patterns might still generate a risbg for this. */ + if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32) + return false; + else + src = force_reg (mode, src); } else if (mode_s != mode) { diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 8c07d1bea57..b23973e4dd0 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -3776,6 +3776,71 @@ [(set_attr "op_type" "RIE") (set_attr "z10prop" "z10_super_E1")]) +; Implement appending Y on the left of S bits of X +; x = (y << s) | (x & ((1 << s) - 1)) +(define_insn "*insv_zEC12_appendbitsleft" + [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") + (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0") + (match_operand:GPR 2 "immediate_operand" "")) + (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d") + (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))] + "TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1" + "risbgn\t%0,%3,64-,64-%4-1,%4" + [(set_attr "op_type" "RIE") + (set_attr "z10prop" "z10_super_E1")]) + +(define_insn "*insv_z10_appendbitsleft" + [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") + (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0") + (match_operand:GPR 2 "immediate_operand" "")) + (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d") + (match_operand:GPR 4 "nonzero_shift_count_operand" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1" + "risbg\t%0,%3,64-,64-%4-1,%4" + [(set_attr "op_type" "RIE") + (set_attr "z10prop" "z10_super_E1")]) + +; z = (x << c) | (y >> d) with (x << c) and (y >> d) not overlapping after shifting +; -> z = y >> d; z = (x << c) | (z & ((1 << c) - 1)) +; -> z = y >> d; z = risbg; + +(define_split + [(set (match_operand:GPR 0 "nonimmediate_operand" "") + (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "") + (match_operand:GPR 2 "nonzero_shift_count_operand" "")) + (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "") + (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))] + "TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= " + [(set (match_dup 0) + (lshiftrt:GPR (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (ior:GPR (and:GPR (match_dup 0) (match_dup 5)) + (ashift:GPR (match_dup 3) (match_dup 4))))] +{ + operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1); +}) + +(define_split + [(parallel + [(set (match_operand:GPR 0 "nonimmediate_operand" "") + (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "") + (match_operand:GPR 2 "nonzero_shift_count_operand" "")) + (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "") + (match_operand:GPR 4 "nonzero_shift_count_operand" "")))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= " + [(set (match_dup 0) + (lshiftrt:GPR (match_dup 1) (match_dup 2))) + (parallel + [(set (match_dup 0) + (ior:GPR (and:GPR (match_dup 0) (match_dup 5)) + (ashift:GPR (match_dup 3) (match_dup 4)))) + (clobber (reg:CC CC_REGNUM))])] +{ + operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1); +}) + (define_insn "*rsbg__noshift" [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") (IXOR:GPR diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 72e7bf294d8..27278a195d6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-07-24 Andreas Krebbel + + * gcc.target/s390/insv-1.c: New test. + * gcc.target/s390/insv-2.c: New test. + * gcc.target/s390/insv-3.c: New test. + 2015-07-24 Tom de Vries * gcc.dg/autopar/uns-outer-4.c: Remove loopfn xfail. diff --git a/gcc/testsuite/gcc.target/s390/insv-1.c b/gcc/testsuite/gcc.target/s390/insv-1.c new file mode 100644 index 00000000000..e6c1b8bc544 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/insv-1.c @@ -0,0 +1,111 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z10 -mzarch" } */ + +unsigned long +foo1 (unsigned long a, unsigned long b) +{ + return (a << 5) | (b & (((1UL << 5) - 1))); +} + +/* This generates very different RTX than foo1. The output reg (r2) + matches the unshifted argument. So it actually is a + (set (zero_extract a 59 0) b) */ +unsigned long +foo2 (unsigned long a, unsigned long b) +{ + return (b << 5) | (a & (((1UL << 5) - 1))); +} + +/* risbg cannot be used when less bits are removed with the mask. */ + +unsigned long +foo1b (unsigned long a, unsigned long b) +{ + return (a << 5) | (b & 1); +} + +unsigned long +foo2b (unsigned long a, unsigned long b) +{ + return (b << 5) | (a & 1); +} + +/* risbg cannot be used when the masked bits would end up in the + result since a real OR is required then. */ +unsigned long +foo1c (unsigned long a, unsigned long b) +{ + return (a << 5) | (b & 127); +} + +unsigned long +foo2c (unsigned long a, unsigned long b) +{ + return (b << 5) | (a & 127); +} + +unsigned long +foo3 (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (a << 5) | (b >> 59); +#else + return (a << 5) | (b >> 27); +#endif +} + +unsigned long +foo4 (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (b << 5) | (a >> 59); +#else + return (b << 5) | (a >> 27); +#endif +} + +/* risbg can be used also if there are some bits spared in the middle + of the two chunks. */ +unsigned long +foo3b (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (a << 6) | (b >> 59); +#else + return (a << 6) | (b >> 27); +#endif +} + +unsigned long +foo4b (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (b << 6) | (a >> 59); +#else + return (b << 6) | (a >> 27); +#endif +} + +/* One bit of overlap so better don't use risbg. */ + +unsigned long +foo3c (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (a << 4) | (b >> 59); +#else + return (a << 4) | (b >> 27); +#endif +} + +unsigned long +foo4c (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (b << 4) | (a >> 59); +#else + return (b << 4) | (a >> 27); +#endif +} + +/* { dg-final { scan-assembler-times "risbg" 6 } } */ diff --git a/gcc/testsuite/gcc.target/s390/insv-2.c b/gcc/testsuite/gcc.target/s390/insv-2.c new file mode 100644 index 00000000000..2ba6d6c88ac --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/insv-2.c @@ -0,0 +1,111 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=zEC12 -mzarch" } */ + +unsigned long +foo1 (unsigned long a, unsigned long b) +{ + return (a << 5) | (b & (((1UL << 5) - 1))); +} + +/* This generates very different RTX than foo1. The output reg (r2) + matches the unshifted argument. So it actually is a + (set (zero_extract a 59 0) b) */ +unsigned long +foo2 (unsigned long a, unsigned long b) +{ + return (b << 5) | (a & (((1UL << 5) - 1))); +} + +/* risbgn cannot be used when less bits are removed with the mask. */ + +unsigned long +foo1b (unsigned long a, unsigned long b) +{ + return (a << 5) | (b & 1); +} + +unsigned long +foo2b (unsigned long a, unsigned long b) +{ + return (b << 5) | (a & 1); +} + +/* risbgn cannot be used when the masked bits would end up in the + result since a real OR is required then. */ +unsigned long +foo1c (unsigned long a, unsigned long b) +{ + return (a << 5) | (b & 127); +} + +unsigned long +foo2c (unsigned long a, unsigned long b) +{ + return (b << 5) | (a & 127); +} + +unsigned long +foo3 (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (a << 5) | (b >> 59); +#else + return (a << 5) | (b >> 27); +#endif +} + +unsigned long +foo4 (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (b << 5) | (a >> 59); +#else + return (b << 5) | (a >> 27); +#endif +} + +/* risbgn can be used also if there are some bits spared in the middle + of the two chunks. */ +unsigned long +foo3b (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (a << 6) | (b >> 59); +#else + return (a << 6) | (b >> 27); +#endif +} + +unsigned long +foo4b (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (b << 6) | (a >> 59); +#else + return (b << 6) | (a >> 27); +#endif +} + +/* One bit of overlap so better don't use risbgn. */ + +unsigned long +foo3c (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (a << 4) | (b >> 59); +#else + return (a << 4) | (b >> 27); +#endif +} + +unsigned long +foo4c (unsigned long a, unsigned long b) +{ +#ifdef __s390x__ + return (b << 4) | (a >> 59); +#else + return (b << 4) | (a >> 27); +#endif +} + +/* { dg-final { scan-assembler-times "risbgn" 6 } } */ diff --git a/gcc/testsuite/gcc.target/s390/insv-3.c b/gcc/testsuite/gcc.target/s390/insv-3.c new file mode 100644 index 00000000000..0719750e12b --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/insv-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=z10 -mzarch" } */ + +/* risbg with z bit would work here but we rather want this to be a shift. */ +struct +{ + int a:31; + int b:1; +} s; + +void +foo (int in) +{ + s.a = in; + s.b = 0; +} + +/* { dg-final { scan-assembler-not "risbg" } } */