+2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+
+ * config/s390/constraints.md: Add comments.
+ (jKK): Reject element sizes > 8 bytes.
+ * config/s390/s390.c (s390_split_ok_p): Enable splitting also for
+ s_operands.
+ * config/s390/s390.md: Add the s_operand checks formerly in
+ s390_split_ok_p to various splitters where they are still
+ required.
+ * config/s390/vector.md ("mov<mode>" V_128): Add GPR alternatives
+ for 128 bit vectors. Plus two splitters.
+
2017-03-24 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* config/s390/s390.md: Rename the cpu facilty vec to vx throughout
"All one bit scalar or vector constant"
(match_test "op == CONSTM1_RTX (GET_MODE (op))"))
+; vector generate mask operand - support for up to 64 bit elements
(define_constraint "jxx"
"@internal"
(and (match_code "const_vector")
(match_test "s390_contiguous_bitmask_vector_p (op, NULL, NULL)")))
+; vector generate byte mask operand - this is only supposed to deal
+; with real vectors 128 bit values of being either 0 or -1 are handled
+; with j00 and jm1
(define_constraint "jyy"
"@internal"
(and (match_code "const_vector")
(match_test "s390_bytemask_vector_p (op, NULL)")))
+; vector replicate immediate operand - support for up to 64 bit elements
(define_constraint "jKK"
"@internal"
- (and (and (match_code "const_vector")
- (match_test "const_vec_duplicate_p (op)"))
+ (and (and (and (match_code "const_vector")
+ (match_test "const_vec_duplicate_p (op)"))
+ (match_test "GET_MODE_UNIT_SIZE (GET_MODE (op)) <= 8"))
(match_test "satisfies_constraint_K (XVECEXP (op, 0, 0))")))
(define_constraint "jm6"
if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
return false;
- /* We don't need to split if operands are directly accessible. */
- if (s_operand (src, mode) || s_operand (dst, mode))
- return false;
-
/* Non-offsettable memory references cannot be split. */
if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
|| (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
[(set (match_operand:TI 0 "nonimmediate_operand" "")
(match_operand:TI 1 "general_operand" ""))]
"TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], TImode)
+ && !s_operand (operands[1], TImode)
&& s390_split_ok_p (operands[0], operands[1], TImode, 0)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
[(set (match_operand:TI 0 "nonimmediate_operand" "")
(match_operand:TI 1 "general_operand" ""))]
"TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], TImode)
+ && !s_operand (operands[1], TImode)
&& s390_split_ok_p (operands[0], operands[1], TImode, 1)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
[(set (match_operand:DI 0 "nonimmediate_operand" "")
(match_operand:DI 1 "general_operand" ""))]
"!TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], DImode)
+ && !s_operand (operands[1], DImode)
&& s390_split_ok_p (operands[0], operands[1], DImode, 0)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
[(set (match_operand:DI 0 "nonimmediate_operand" "")
(match_operand:DI 1 "general_operand" ""))]
"!TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], DImode)
+ && !s_operand (operands[1], DImode)
&& s390_split_ok_p (operands[0], operands[1], DImode, 1)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
[(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
(match_operand:TD_TF 1 "general_operand" ""))]
"TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], <MODE>mode)
+ && !s_operand (operands[1], <MODE>mode)
&& s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
[(set (match_operand:TD_TF 0 "nonimmediate_operand" "")
(match_operand:TD_TF 1 "general_operand" ""))]
"TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], <MODE>mode)
+ && !s_operand (operands[1], <MODE>mode)
&& s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
[(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
(match_operand:DD_DF 1 "general_operand" ""))]
"!TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], <MODE>mode)
+ && !s_operand (operands[1], <MODE>mode)
&& s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
[(set (match_operand:DD_DF 0 "nonimmediate_operand" "")
(match_operand:DD_DF 1 "general_operand" ""))]
"!TARGET_ZARCH && reload_completed
+ && !s_operand (operands[0], <MODE>mode)
+ && !s_operand (operands[1], <MODE>mode)
&& s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
(include "vx-builtins.md")
; Full HW vector size moves
+
+; We don't use lm/stm for 128 bit moves since these are slower than
+; splitting it into separate moves.
+
+; FIXME: More constants are possible by enabling jxx, jyy constraints
+; for TImode (use double-int for the calculations)
+
; vgmb, vgmh, vgmf, vgmg, vrepib, vrepih, vrepif, vrepig
(define_insn "mov<mode>"
- [(set (match_operand:V_128 0 "nonimmediate_operand" "=v,v,R, v, v, v, v, v,v,d")
- (match_operand:V_128 1 "general_operand" " v,R,v,j00,jm1,jyy,jxx,jKK,d,v"))]
- "TARGET_VX"
+ [(set (match_operand:V_128 0 "nonimmediate_operand" "=v,v,R, v, v, v, v, v,v,*d,*d,?o")
+ (match_operand:V_128 1 "general_operand" " v,R,v,j00,jm1,jyy,jxx,jKK,d, v,dT,*d"))]
+ ""
"@
vlr\t%v0,%v1
vl\t%v0,%1
vgm<bhfgq>\t%v0,%s1,%e1
vrepi<bhfgq>\t%v0,%h1
vlvgp\t%v0,%1,%N1
+ #
+ #
#"
- [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*")])
+ [(set_attr "cpu_facility" "vx,vx,vx,vx,vx,vx,vx,vx,vx,vx,*,*")
+ (set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRI,VRR,*,*,*")])
+; VR -> GPR, no instruction so split it into 64 element sets.
(define_split
[(set (match_operand:V_128 0 "register_operand" "")
(match_operand:V_128 1 "register_operand" ""))]
operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
})
+; Split the 128 bit GPR move into two word mode moves
+; s390_split_ok_p decides which part needs to be moved first.
+
+(define_split
+ [(set (match_operand:V_128 0 "nonimmediate_operand" "")
+ (match_operand:V_128 1 "general_operand" ""))]
+ "reload_completed
+ && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 0)"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 5))]
+{
+ operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+ operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
+ operands[4] = operand_subword (operands[1], 0, 0, <MODE>mode);
+ operands[5] = operand_subword (operands[1], 1, 0, <MODE>mode);
+})
+
+(define_split
+ [(set (match_operand:V_128 0 "nonimmediate_operand" "")
+ (match_operand:V_128 1 "general_operand" ""))]
+ "reload_completed
+ && s390_split_ok_p (operands[0], operands[1], <MODE>mode, 1)"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 5))]
+{
+ operands[2] = operand_subword (operands[0], 1, 0, <MODE>mode);
+ operands[3] = operand_subword (operands[0], 0, 0, <MODE>mode);
+ operands[4] = operand_subword (operands[1], 1, 0, <MODE>mode);
+ operands[5] = operand_subword (operands[1], 0, 0, <MODE>mode);
+})
+
+
; Moves for smaller vector modes.
; In these patterns only the vlr, vone, and vzero instructions write