+2015-07-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+
+ * config/s390/s390.c (s390_rtx_costs): Make risbg patterns
+ cheaper.
+ (s390_expand_insv): Don't generate risbg pattern for constant zero
+ sources.
+ * config/s390/s390.md ("*insv<mode>_zEC12_appendbitsleft")
+ ("*insv<mode>_z10_appendbitsleft"): New pattern definitions. New
+ splitters.
+
2015-07-24 Dominik Vogt <vogt@linux.vnet.ibm.com>
* config/s390/s390.c (s390_reorg): Clean up handling of processors
*total = 0;
return true;
+ case IOR:
+ /* risbg */
+ if (GET_CODE (XEXP (x, 0)) == AND
+ && GET_CODE (XEXP (x, 1)) == ASHIFT
+ && REG_P (XEXP (XEXP (x, 0), 0))
+ && REG_P (XEXP (XEXP (x, 1), 0))
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+ && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
+ (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
+ {
+ *total = COSTS_N_INSNS (2);
+ return true;
+ }
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
case ROTATE:
case ROTATERT:
case AND:
- case IOR:
case XOR:
case NEG:
case NOT:
if (mode_s == VOIDmode)
{
- /* Assume const_int etc already in the proper mode. */
- src = force_reg (mode, src);
+ /* For constant zero values the representation with AND
+ appears to be folded in more situations than the (set
+ (zero_extract) ...).
+ We only do this when the start and end of the bitfield
+ remain in the same SImode chunk. That way nihf or nilf
+ can be used.
+ The AND patterns might still generate a risbg for this. */
+ if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
+ return false;
+ else
+ src = force_reg (mode, src);
}
else if (mode_s != mode)
{
[(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
+; Implement appending Y on the left of S bits of X
+; x = (y << s) | (x & ((1 << s) - 1))
+(define_insn "*insv<mode>_zEC12_appendbitsleft"
+ [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+ (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+ (match_operand:GPR 2 "immediate_operand" ""))
+ (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+ (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
+ "TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+ "risbgn\t%0,%3,64-<bitsize>,64-%4-1,%4"
+ [(set_attr "op_type" "RIE")
+ (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn "*insv<mode>_z10_appendbitsleft"
+ [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+ (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+ (match_operand:GPR 2 "immediate_operand" ""))
+ (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+ (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+ "risbg\t%0,%3,64-<bitsize>,64-%4-1,%4"
+ [(set_attr "op_type" "RIE")
+ (set_attr "z10prop" "z10_super_E1")])
+
+; z = (x << c) | (y >> d) with (x << c) and (y >> d) not overlapping after shifting
+; -> z = y >> d; z = (x << c) | (z & ((1 << c) - 1))
+; -> z = y >> d; z = risbg;
+
+(define_split
+ [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+ (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+ (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+ (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+ (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
+ "TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
+ [(set (match_dup 0)
+ (lshiftrt:GPR (match_dup 1) (match_dup 2)))
+ (set (match_dup 0)
+ (ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+ (ashift:GPR (match_dup 3) (match_dup 4))))]
+{
+ operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
+(define_split
+ [(parallel
+ [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+ (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+ (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+ (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+ (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
+ (clobber (reg:CC CC_REGNUM))])]
+ "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
+ [(set (match_dup 0)
+ (lshiftrt:GPR (match_dup 1) (match_dup 2)))
+ (parallel
+ [(set (match_dup 0)
+ (ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+ (ashift:GPR (match_dup 3) (match_dup 4))))
+ (clobber (reg:CC CC_REGNUM))])]
+{
+ operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
(define_insn "*r<noxa>sbg_<mode>_noshift"
[(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
(IXOR:GPR
+2015-07-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+
+ * gcc.target/s390/insv-1.c: New test.
+ * gcc.target/s390/insv-2.c: New test.
+ * gcc.target/s390/insv-3.c: New test.
+
2015-07-24 Tom de Vries <tom@codesourcery.com>
* gcc.dg/autopar/uns-outer-4.c: Remove loopfn xfail.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+ return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1. The output reg (r2)
+ matches the unshifted argument. So it actually is a
+ (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+ return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbg cannot be used when less bits are removed with the mask. */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+ return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+ return (b << 5) | (a & 1);
+}
+
+/* risbg cannot be used when the masked bits would end up in the
+ result since a real OR is required then. */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+ return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+ return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (a << 5) | (b >> 59);
+#else
+ return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (b << 5) | (a >> 59);
+#else
+ return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbg can be used also if there are some bits spared in the middle
+ of the two chunks. */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (a << 6) | (b >> 59);
+#else
+ return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (b << 6) | (a >> 59);
+#else
+ return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbg. */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (a << 4) | (b >> 59);
+#else
+ return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (b << 4) | (a >> 59);
+#else
+ return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbg" 6 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=zEC12 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+ return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1. The output reg (r2)
+ matches the unshifted argument. So it actually is a
+ (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+ return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbgn cannot be used when less bits are removed with the mask. */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+ return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+ return (b << 5) | (a & 1);
+}
+
+/* risbgn cannot be used when the masked bits would end up in the
+ result since a real OR is required then. */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+ return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+ return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (a << 5) | (b >> 59);
+#else
+ return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (b << 5) | (a >> 59);
+#else
+ return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbgn can be used also if there are some bits spared in the middle
+ of the two chunks. */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (a << 6) | (b >> 59);
+#else
+ return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (b << 6) | (a >> 59);
+#else
+ return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbgn. */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (a << 4) | (b >> 59);
+#else
+ return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+ return (b << 4) | (a >> 59);
+#else
+ return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbgn" 6 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+/* risbg with z bit would work here but we rather want this to be a shift. */
+struct
+{
+ int a:31;
+ int b:1;
+} s;
+
+void
+foo (int in)
+{
+ s.a = in;
+ s.b = 0;
+}
+
+/* { dg-final { scan-assembler-not "risbg" } } */