+2015-05-19 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
+
+ * config/s390/constraints.md (j00, jm1, jxx, jyy, v): New
+ constraints.
+ * config/s390/predicates.md (const0_operand, constm1_operand)
+ (constable_operand): Accept vector operands.
+ * config/s390/s390-modes.def: Add supported vector modes.
+ * config/s390/s390-protos.h (s390_cannot_change_mode_class)
+ (s390_function_arg_vector, s390_contiguous_bitmask_vector_p)
+ (s390_bytemask_vector_p, s390_expand_vec_strlen)
+ (s390_expand_vec_compare, s390_expand_vcond)
+ (s390_expand_vec_init): Add prototypes.
+ * config/s390/s390.c (VEC_ARG_NUM_REG): New macro.
+ (s390_vector_mode_supported_p): New function.
+ (s390_contiguous_bitmask_p): Mask out the irrelevant bits.
+ (s390_contiguous_bitmask_vector_p): New function.
+ (s390_bytemask_vector_p): New function.
+ (s390_split_ok_p): Vector regs don't work either.
+ (regclass_map): Add VEC_REGS.
+ (s390_legitimate_constant_p): Handle vector constants.
+ (s390_cannot_force_const_mem): Handle CONST_VECTOR.
+ (legitimate_reload_vector_constant_p): New function.
+ (s390_preferred_reload_class): Handle CONST_VECTOR.
+ (s390_reload_symref_address): Likewise.
+ (s390_secondary_reload): Vector memory instructions only support
+ short displacements. Rename reload*_nonoffmem* to reload*_la*.
+ (s390_emit_ccraw_jump): New function.
+ (s390_expand_vec_strlen): New function.
+ (s390_expand_vec_compare): New function.
+ (s390_expand_vcond): New function.
+ (s390_expand_vec_init): New function.
+ (s390_dwarf_frame_reg_mode): New function.
+ (print_operand): Handle addresses with 'O' and 'R' constraints.
+ (NR_C_MODES, constant_modes): Add vector modes.
+ (s390_output_pool_entry): Handle vector constants.
+ (s390_hard_regno_mode_ok): Handle vector registers.
+ (s390_class_max_nregs): Likewise.
+ (s390_cannot_change_mode_class): New function.
+ (s390_invalid_arg_for_unprototyped_fn): New function.
+ (s390_function_arg_vector): New function.
+ (s390_function_arg_float): Remove size variable.
+ (s390_pass_by_reference): Handle vector arguments.
+ (s390_function_arg_advance): Likewise.
+ (s390_function_arg): Likewise.
+ (s390_return_in_memory): Vector values are returned in a VR if
+ possible.
+ (s390_function_and_libcall_value): Handle vector arguments.
+ (s390_gimplify_va_arg): Likewise.
+ (s390_call_saved_register_used): Consider the arguments named.
+ (s390_conditional_register_usage): Disable v16-v31 for non-vec
+ targets.
+ (s390_preferred_simd_mode): New function.
+ (s390_support_vector_misalignment): New function.
+ (s390_vector_alignment): New function.
+ (TARGET_STRICT_ARGUMENT_NAMING, TARGET_DWARF_FRAME_REG_MODE)
+ (TARGET_VECTOR_MODE_SUPPORTED_P)
+ (TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN)
+ (TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
+ (TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT)
+ (TARGET_VECTOR_ALIGNMENT): Define target macro.
+ * config/s390/s390.h (FUNCTION_ARG_PADDING): Define macro.
+ (FIRST_PSEUDO_REGISTER): Increase value.
+ (VECTOR_NOFP_REGNO_P, VECTOR_REGNO_P, VECTOR_NOFP_REG_P)
+ (VECTOR_REG_P): Define macros.
+ (FIXED_REGISTERS, CALL_USED_REGISTERS)
+ (CALL_REALLY_USED_REGISTERS, REG_ALLOC_ORDER)
+ (HARD_REGNO_CALL_PART_CLOBBERED, REG_CLASS_NAMES)
+ (FUNCTION_ARG_REGNO_P, FUNCTION_VALUE_REGNO_P, REGISTER_NAMES):
+ Add vector registers.
+ (CANNOT_CHANGE_MODE_CLASS): Call C function.
+ (enum reg_class): Add VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS.
+ (SECONDARY_MEMORY_NEEDED): Allow SF<->SI mode moves without
+ memory.
+ (DBX_REGISTER_NUMBER, FIRST_VEC_ARG_REGNO, LAST_VEC_ARG_REGNO)
+ (SHORT_DISP_IN_RANGE, VECTOR_STORE_FLAG_VALUE): Define macro.
+ * config/s390/s390.md (UNSPEC_VEC_*): New constants.
+ (VR*_REGNUM): New constants.
+ (ALL): New mode iterator.
+ (INTALL): Remove mode iterator.
+ Include vector.md.
+ (movti): Implement TImode moves for VRs.
+ Disable TImode splitter for VR targets.
+ Implement splitting TImode GPR<->VR moves.
+ (reload*_tomem_z10, reload*_toreg_z10): Replace INTALL with ALL.
+ (reload<mode>_nonoffmem_in, reload<mode>_nonoffmem_out): Rename to
+ reload<mode>_la_in, reload<mode>_la_out.
+ (*movdi_64, *movsi_zarch, *movhi, *movqi, *mov<mode>_64dfp)
+ (*mov<mode>_64, *mov<mode>_31): Add vector instructions.
+ (TD/TF mode splitter): Enable for GPRs only (formerly !FP).
+ (mov<mode> SF SD): Prefer lder, lde for loading.
+ Add lrl and strl instructions.
+ Add vector instructions.
+ (strlen<mode>): Rename old strlen<mode> to strlen_srst<mode>.
+ Call s390_expand_vec_strlen on z13.
+ (*cc_to_int): Change predicate to nonimmediate_operand.
+ (addti3): Rename to *addti3. New expander.
+ (subti3): Rename to *subti3. New expander.
+ * config/s390/vector.md: New file.
+
2015-05-19 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* common/config/s390/s390-common.c (processor_flags_table): Add
;; c -- Condition code register 33.
;; d -- Any register from 0 to 15.
;; f -- Floating point registers.
+;; j -- Multiple letter constraint for constant scalar and vector values
+;; j00: constant zero scalar or vector
+;; jm1: constant scalar or vector with all bits set
+;; jxx: contiguous bitmask of 0 or 1 in all vector elements
+;; jyy: constant consisting of byte chunks being either 0 or 0xff
;; t -- Access registers 36 and 37.
+;; v -- Vector registers v0-v31.
;; C -- A signed 8-bit constant (-128..127)
;; D -- An unsigned 16-bit constant (0..65535)
;; G -- Const double zero operand
"FP_REGS"
"Floating point registers")
+(define_constraint "j00"
+ "Zero scalar or vector constant"
+ (match_test "op == CONST0_RTX (GET_MODE (op))"))
+
+(define_constraint "jm1"
+ "All one bit scalar or vector constant"
+ (match_test "op == CONSTM1_RTX (GET_MODE (op))"))
+
+(define_constraint "jxx"
+ "@internal"
+ (and (match_code "const_vector")
+ (match_test "s390_contiguous_bitmask_vector_p (op, NULL, NULL)")))
+
+(define_constraint "jyy"
+ "@internal"
+ (and (match_code "const_vector")
+ (match_test "s390_bytemask_vector_p (op, NULL)")))
(define_register_constraint "t"
"ACCESS_REGS"
Access registers 36 and 37")
+(define_register_constraint "v"
+ "VEC_REGS"
+ "Vector registers v0-v31")
+
+
;;
;; General constraints for constants.
;;
;; operands --------------------------------------------------------------
-;; Return true if OP a (const_int 0) operand.
-
+;; Return true if OP a const 0 operand (int/float/vector).
(define_predicate "const0_operand"
- (and (match_code "const_int, const_double")
+ (and (match_code "const_int,const_double,const_vector")
(match_test "op == CONST0_RTX (mode)")))
+;; Return true if OP an all ones operand (int/float/vector).
+(define_predicate "constm1_operand"
+ (and (match_code "const_int, const_double,const_vector")
+ (match_test "op == CONSTM1_RTX (mode)")))
+
;; Return true if OP is constant.
(define_special_predicate "consttable_operand"
- (and (match_code "symbol_ref, label_ref, const, const_int, const_double")
+ (and (match_code "symbol_ref, label_ref, const, const_int, const_double, const_vector")
(match_test "CONSTANT_P (op)")))
;; Return true if OP is a valid S-type operand.
CC_MODE (CCT2);
CC_MODE (CCT3);
CC_MODE (CCRAW);
+
+/* Vector modes. */
+
+VECTOR_MODES (INT, 2); /* V2QI */
+VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
+
+VECTOR_MODE (FLOAT, SF, 2); /* V2SF */
+VECTOR_MODE (FLOAT, SF, 4); /* V4SF */
+VECTOR_MODE (FLOAT, DF, 2); /* V2DF */
+
+VECTOR_MODE (INT, QI, 1); /* V1QI */
+VECTOR_MODE (INT, HI, 1); /* V1HI */
+VECTOR_MODE (INT, SI, 1); /* V1SI */
+VECTOR_MODE (INT, DI, 1); /* V1DI */
+VECTOR_MODE (INT, TI, 1); /* V1TI */
+
+VECTOR_MODE (FLOAT, SF, 1); /* V1SF */
+VECTOR_MODE (FLOAT, DF, 1); /* V1DF */
+VECTOR_MODE (FLOAT, TF, 1); /* V1TF */
extern bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
extern int s390_class_max_nregs (enum reg_class, machine_mode);
+extern int s390_cannot_change_mode_class (machine_mode, machine_mode,
+ enum reg_class);
+extern bool s390_function_arg_vector (machine_mode, const_tree);
#ifdef RTX_CODE
extern int s390_extra_constraint_str (rtx, int, const char *);
extern int s390_single_part (rtx, machine_mode, machine_mode, int);
extern unsigned HOST_WIDE_INT s390_extract_part (rtx, machine_mode, int);
extern bool s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT, int, int *, int *);
+extern bool s390_contiguous_bitmask_vector_p (rtx, int *, int *);
+extern bool s390_bytemask_vector_p (rtx, unsigned *);
extern bool s390_split_ok_p (rtx, rtx, machine_mode, int);
extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT);
extern bool s390_offset_p (rtx, rtx, rtx);
extern bool s390_expand_movmem (rtx, rtx, rtx);
extern void s390_expand_setmem (rtx, rtx, rtx);
extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
+extern void s390_expand_vec_strlen (rtx, rtx, rtx);
extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
extern void s390_expand_cs_hqi (machine_mode, rtx, rtx, rtx,
extern void s390_expand_atomic (machine_mode, enum rtx_code,
rtx, rtx, rtx, bool);
extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
+extern void s390_expand_vec_compare (rtx, enum rtx_code, rtx, rtx);
+extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
+extern void s390_expand_vec_init (rtx, rtx);
extern rtx s390_return_addr_rtx (int, rtx);
extern rtx s390_back_chain_rtx (void);
extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
#include "context.h"
#include "builtins.h"
#include "rtl-iter.h"
+#include "intl.h"
+#include "plugin-api.h"
+#include "ipa-ref.h"
+#include "cgraph.h"
/* Define the specific costs for a given cpu. */
/* Number of GPRs and FPRs used for argument passing. */
#define GP_ARG_NUM_REG 5
#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
+#define VEC_ARG_NUM_REG 8
/* A couple of shortcuts. */
#define CONST_OK_FOR_J(x) \
return default_scalar_mode_supported_p (mode);
}
+/* Return true if the back end supports vector mode MODE. */
+static bool
+s390_vector_mode_supported_p (machine_mode mode)
+{
+ machine_mode inner;
+
+ if (!VECTOR_MODE_P (mode)
+ || !TARGET_VX
+ || GET_MODE_SIZE (mode) > 16)
+ return false;
+
+ inner = GET_MODE_INNER (mode);
+
+ switch (inner)
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case TImode:
+ case SFmode:
+ case DFmode:
+ case TFmode:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
void
/* Calculate a mask for all bits beyond the contiguous bits. */
mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
+ if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
+ mask &= (HOST_WIDE_INT_1U << size) - 1;
+
if (mask & in)
return false;
return true;
}
+/* Return true if OP contains the same contiguous bitfield in *all*
+ its elements. START and END can be used to obtain the start and
+ end position of the bitfield.
+
+ START/STOP give the position of the first/last bit of the bitfield
+ counting from the lowest order bit starting with zero. In order to
+ use these values for S/390 instructions this has to be converted to
+ "bits big endian" style. */
+
+bool
+s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
+{
+ unsigned HOST_WIDE_INT mask;
+ int length, size;
+
+ if (!VECTOR_MODE_P (GET_MODE (op))
+ || GET_CODE (op) != CONST_VECTOR
+ || !CONST_INT_P (XVECEXP (op, 0, 0)))
+ return false;
+
+ if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
+ {
+ int i;
+
+ for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
+ if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
+ return false;
+ }
+
+ size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
+ mask = UINTVAL (XVECEXP (op, 0, 0));
+ if (s390_contiguous_bitmask_p (mask, size, start,
+ end != NULL ? &length : NULL))
+ {
+ if (end != NULL)
+ *end = *start + length - 1;
+ return true;
+ }
+ /* 0xff00000f style immediates can be covered by swapping start and
+ end indices in vgm. */
+ if (s390_contiguous_bitmask_p (~mask, size, start,
+ end != NULL ? &length : NULL))
+ {
+ if (end != NULL)
+ *end = *start - 1;
+ if (start != NULL)
+ *start = *start + length;
+ return true;
+ }
+ return false;
+}
+
+/* Return true if C consists only of byte chunks being either 0 or
+ 0xff. If MASK is !=NULL a byte mask is generated which is
+ appropriate for the vector generate byte mask instruction. */
+
+bool
+s390_bytemask_vector_p (rtx op, unsigned *mask)
+{
+ int i;
+ unsigned tmp_mask = 0;
+ int nunit, unit_size;
+
+ if (!VECTOR_MODE_P (GET_MODE (op))
+ || GET_CODE (op) != CONST_VECTOR
+ || !CONST_INT_P (XVECEXP (op, 0, 0)))
+ return false;
+
+ nunit = GET_MODE_NUNITS (GET_MODE (op));
+ unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
+
+ for (i = 0; i < nunit; i++)
+ {
+ unsigned HOST_WIDE_INT c;
+ int j;
+
+ if (!CONST_INT_P (XVECEXP (op, 0, i)))
+ return false;
+
+ c = UINTVAL (XVECEXP (op, 0, i));
+ for (j = 0; j < unit_size; j++)
+ {
+ if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
+ return false;
+ tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
+ c = c >> BITS_PER_UNIT;
+ }
+ }
+
+ if (mask != NULL)
+ *mask = tmp_mask;
+
+ return true;
+}
+
/* Check whether a rotate of ROTL followed by an AND of CONTIG is
equivalent to a shift followed by the AND. In particular, CONTIG
should not overlap the (rotated) bit 0/bit 63 gap. Negative values
bool
s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
{
- /* Floating point registers cannot be split. */
- if (FP_REG_P (src) || FP_REG_P (dst))
+ /* Floating point and vector registers cannot be split. */
+ if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
return false;
/* We don't need to split if operands are directly accessible. */
/* Map for smallest class containing reg regno. */
const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
-{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- FP_REGS, FP_REGS, FP_REGS, FP_REGS,
- ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
- ACCESS_REGS, ACCESS_REGS
+{ GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
+ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
+ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
+ ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
+ FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
+ ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
+ ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
+ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
+ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
+ VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
+ VEC_REGS, VEC_REGS /* 52 */
};
/* Return attribute type of insn. */
static bool
s390_legitimate_constant_p (machine_mode mode, rtx op)
{
+ if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
+ {
+ if (GET_MODE_SIZE (mode) != 16)
+ return 0;
+
+ if (!const0_operand (op, mode)
+ && !s390_contiguous_bitmask_vector_p (op, NULL, NULL)
+ && !s390_bytemask_vector_p (op, NULL))
+ return 0;
+ }
+
/* Accept all non-symbolic constants. */
if (!SYMBOLIC_CONST (op))
return 1;
{
case CONST_INT:
case CONST_DOUBLE:
+ case CONST_VECTOR:
/* Accept all non-symbolic constants. */
return false;
return false;
}
+/* Returns true if the constant value OP is a legitimate vector operand
+ during and after reload.
+ This function accepts all constants which can be loaded directly
+ into an VR. */
+
+static bool
+legitimate_reload_vector_constant_p (rtx op)
+{
+ /* FIXME: Support constant vectors with all the same 16 bit unsigned
+ operands. These can be loaded with vrepi. */
+
+ if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
+ && (const0_operand (op, GET_MODE (op))
+ || constm1_operand (op, GET_MODE (op))
+ || s390_contiguous_bitmask_vector_p (op, NULL, NULL)
+ || s390_bytemask_vector_p (op, NULL)))
+ return true;
+
+ return false;
+}
+
/* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
return the class of reg to actually use. */
{
/* Constants we cannot reload into general registers
must be forced into the literal pool. */
+ case CONST_VECTOR:
case CONST_DOUBLE:
case CONST_INT:
if (reg_class_subset_p (GENERAL_REGS, rclass)
else if (reg_class_subset_p (FP_REGS, rclass)
&& legitimate_reload_fp_constant_p (op))
return FP_REGS;
+ else if (reg_class_subset_p (VEC_REGS, rclass)
+ && legitimate_reload_vector_constant_p (op))
+ return VEC_REGS;
+
return NO_REGS;
/* If a symbolic constant or a PLUS is reloaded,
/* Reload might have pulled a constant out of the literal pool.
Force it back in. */
if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
+ || GET_CODE (mem) == CONST_VECTOR
|| GET_CODE (mem) == CONST)
mem = force_const_mem (GET_MODE (reg), mem);
if (reg_classes_intersect_p (CC_REGS, rclass))
return GENERAL_REGS;
+ if (TARGET_VX)
+ {
+ /* The vst/vl vector move instructions allow only for short
+ displacements. */
+ if (MEM_P (x)
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+ && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
+ && reg_class_subset_p (rclass, VEC_REGS)
+ && (!reg_class_subset_p (rclass, FP_REGS)
+ || (GET_MODE_SIZE (mode) > 8
+ && s390_class_max_nregs (FP_REGS, mode) == 1)))
+ {
+ if (in_p)
+ sri->icode = (TARGET_64BIT ?
+ CODE_FOR_reloaddi_la_in :
+ CODE_FOR_reloadsi_la_in);
+ else
+ sri->icode = (TARGET_64BIT ?
+ CODE_FOR_reloaddi_la_out :
+ CODE_FOR_reloadsi_la_out);
+ }
+ }
+
if (TARGET_Z10)
{
HOST_WIDE_INT offset;
__SECONDARY_RELOAD_CASE (SD, sd);
__SECONDARY_RELOAD_CASE (DD, dd);
__SECONDARY_RELOAD_CASE (TD, td);
-
+ __SECONDARY_RELOAD_CASE (V1QI, v1qi);
+ __SECONDARY_RELOAD_CASE (V2QI, v2qi);
+ __SECONDARY_RELOAD_CASE (V4QI, v4qi);
+ __SECONDARY_RELOAD_CASE (V8QI, v8qi);
+ __SECONDARY_RELOAD_CASE (V16QI, v16qi);
+ __SECONDARY_RELOAD_CASE (V1HI, v1hi);
+ __SECONDARY_RELOAD_CASE (V2HI, v2hi);
+ __SECONDARY_RELOAD_CASE (V4HI, v4hi);
+ __SECONDARY_RELOAD_CASE (V8HI, v8hi);
+ __SECONDARY_RELOAD_CASE (V1SI, v1si);
+ __SECONDARY_RELOAD_CASE (V2SI, v2si);
+ __SECONDARY_RELOAD_CASE (V4SI, v4si);
+ __SECONDARY_RELOAD_CASE (V1DI, v1di);
+ __SECONDARY_RELOAD_CASE (V2DI, v2di);
+ __SECONDARY_RELOAD_CASE (V1TI, v1ti);
+ __SECONDARY_RELOAD_CASE (V1SF, v1sf);
+ __SECONDARY_RELOAD_CASE (V2SF, v2sf);
+ __SECONDARY_RELOAD_CASE (V4SF, v4sf);
+ __SECONDARY_RELOAD_CASE (V1DF, v1df);
+ __SECONDARY_RELOAD_CASE (V2DF, v2df);
+ __SECONDARY_RELOAD_CASE (V1TF, v1tf);
default:
gcc_unreachable ();
}
{
if (in_p)
sri->icode = (TARGET_64BIT ?
- CODE_FOR_reloaddi_nonoffmem_in :
- CODE_FOR_reloadsi_nonoffmem_in);
+ CODE_FOR_reloaddi_la_in :
+ CODE_FOR_reloadsi_la_in);
else
sri->icode = (TARGET_64BIT ?
- CODE_FOR_reloaddi_nonoffmem_out :
- CODE_FOR_reloadsi_nonoffmem_out);
+ CODE_FOR_reloaddi_la_out :
+ CODE_FOR_reloadsi_la_out);
}
}
return true;
}
+/* Emit a conditional jump to LABEL for condition code mask MASK using
+ comparsion operator COMPARISON. Return the emitted jump insn. */
+
+static rtx
+s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
+{
+ rtx temp;
+
+ gcc_assert (comparison == EQ || comparison == NE);
+ gcc_assert (mask > 0 && mask < 15);
+
+ temp = gen_rtx_fmt_ee (comparison, VOIDmode,
+ gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+ temp = gen_rtx_SET (pc_rtx, temp);
+ return emit_jump_insn (temp);
+}
+
+/* Emit the instructions to implement strlen of STRING and store the
+ result in TARGET. The string has the known ALIGNMENT. This
+ version uses vector instructions and is therefore not appropriate
+ for targets prior to z13. */
+
+void
+s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
+{
+ int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
+ int very_likely = REG_BR_PROB_BASE - 1;
+ rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
+ rtx str_reg = gen_reg_rtx (V16QImode);
+ rtx str_addr_base_reg = gen_reg_rtx (Pmode);
+ rtx str_idx_reg = gen_reg_rtx (Pmode);
+ rtx result_reg = gen_reg_rtx (V16QImode);
+ rtx is_aligned_label = gen_label_rtx ();
+ rtx into_loop_label = NULL_RTX;
+ rtx loop_start_label = gen_label_rtx ();
+ rtx temp;
+ rtx len = gen_reg_rtx (QImode);
+ rtx cond;
+
+ s390_load_address (str_addr_base_reg, XEXP (string, 0));
+ emit_move_insn (str_idx_reg, const0_rtx);
+
+ if (INTVAL (alignment) < 16)
+ {
+ /* Check whether the address happens to be aligned properly so
+ jump directly to the aligned loop. */
+ emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
+ str_addr_base_reg, GEN_INT (15)),
+ const0_rtx, EQ, NULL_RTX,
+ Pmode, 1, is_aligned_label);
+
+ temp = gen_reg_rtx (Pmode);
+ temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
+ GEN_INT (15), temp, 1, OPTAB_DIRECT);
+ gcc_assert (REG_P (temp));
+ highest_index_to_load_reg =
+ expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
+ highest_index_to_load_reg, 1, OPTAB_DIRECT);
+ gcc_assert (REG_P (highest_index_to_load_reg));
+ emit_insn (gen_vllv16qi (str_reg,
+ convert_to_mode (SImode, highest_index_to_load_reg, 1),
+ gen_rtx_MEM (BLKmode, str_addr_base_reg)));
+
+ into_loop_label = gen_label_rtx ();
+ s390_emit_jump (into_loop_label, NULL_RTX);
+ emit_barrier ();
+ }
+
+ emit_label (is_aligned_label);
+ LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
+
+ /* Reaching this point we are only performing 16 bytes aligned
+ loads. */
+ emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
+
+ emit_label (loop_start_label);
+ LABEL_NUSES (loop_start_label) = 1;
+
+ /* Load 16 bytes of the string into VR. */
+ emit_move_insn (str_reg,
+ gen_rtx_MEM (V16QImode,
+ gen_rtx_PLUS (Pmode, str_idx_reg,
+ str_addr_base_reg)));
+ if (into_loop_label != NULL_RTX)
+ {
+ emit_label (into_loop_label);
+ LABEL_NUSES (into_loop_label) = 1;
+ }
+
+ /* Increment string index by 16 bytes. */
+ expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
+ str_idx_reg, 1, OPTAB_DIRECT);
+
+ emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
+ GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
+
+ add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
+ REG_BR_PROB, very_likely);
+ emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
+
+ /* If the string pointer wasn't aligned we have loaded less then 16
+ bytes and the remaining bytes got filled with zeros (by vll).
+ Now we have to check whether the resulting index lies within the
+ bytes actually part of the string. */
+
+ cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
+ highest_index_to_load_reg);
+ s390_load_address (highest_index_to_load_reg,
+ gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
+ const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_movdicc (str_idx_reg, cond,
+ highest_index_to_load_reg, str_idx_reg));
+ else
+ emit_insn (gen_movsicc (str_idx_reg, cond,
+ highest_index_to_load_reg, str_idx_reg));
+
+ add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
+ very_unlikely);
+
+ expand_binop (Pmode, add_optab, str_idx_reg,
+ GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
+ /* FIXME: len is already zero extended - so avoid the llgcr emitted
+ here. */
+ temp = expand_binop (Pmode, add_optab, str_idx_reg,
+ convert_to_mode (Pmode, len, 1),
+ target, 1, OPTAB_DIRECT);
+ if (temp != target)
+ emit_move_insn (target, temp);
+}
/* Expand conditional increment or decrement using alc/slb instructions.
Should generate code setting DST to either SRC or SRC + INCREMENT,
NULL_RTX, 1, OPTAB_DIRECT);
}
+/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
+ the result in TARGET. */
+
+void
+s390_expand_vec_compare (rtx target, enum rtx_code cond,
+ rtx cmp_op1, rtx cmp_op2)
+{
+ machine_mode mode = GET_MODE (target);
+ bool neg_p = false, swap_p = false;
+ rtx tmp;
+
+ if (GET_MODE (cmp_op1) == V2DFmode)
+ {
+ switch (cond)
+ {
+ /* NE a != b -> !(a == b) */
+ case NE: cond = EQ; neg_p = true; break;
+ /* UNGT a u> b -> !(b >= a) */
+ case UNGT: cond = GE; neg_p = true; swap_p = true; break;
+ /* UNGE a u>= b -> !(b > a) */
+ case UNGE: cond = GT; neg_p = true; swap_p = true; break;
+ /* LE: a <= b -> b >= a */
+ case LE: cond = GE; swap_p = true; break;
+ /* UNLE: a u<= b -> !(a > b) */
+ case UNLE: cond = GT; neg_p = true; break;
+ /* LT: a < b -> b > a */
+ case LT: cond = GT; swap_p = true; break;
+ /* UNLT: a u< b -> !(a >= b) */
+ case UNLT: cond = GE; neg_p = true; break;
+ case UNEQ:
+ emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
+ return;
+ case LTGT:
+ emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
+ return;
+ case ORDERED:
+ emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
+ return;
+ case UNORDERED:
+ emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
+ return;
+ default: break;
+ }
+ }
+ else
+ {
+ switch (cond)
+ {
+ /* NE: a != b -> !(a == b) */
+ case NE: cond = EQ; neg_p = true; break;
+ /* GE: a >= b -> !(b > a) */
+ case GE: cond = GT; neg_p = true; swap_p = true; break;
+ /* GEU: a >= b -> !(b > a) */
+ case GEU: cond = GTU; neg_p = true; swap_p = true; break;
+ /* LE: a <= b -> !(a > b) */
+ case LE: cond = GT; neg_p = true; break;
+ /* LEU: a <= b -> !(a > b) */
+ case LEU: cond = GTU; neg_p = true; break;
+ /* LT: a < b -> b > a */
+ case LT: cond = GT; swap_p = true; break;
+ /* LTU: a < b -> b > a */
+ case LTU: cond = GTU; swap_p = true; break;
+ default: break;
+ }
+ }
+
+ if (swap_p)
+ {
+ tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
+ }
+
+ emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
+ mode,
+ cmp_op1, cmp_op2)));
+ if (neg_p)
+ emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
+}
+
+/* Generate a vector comparison expression loading either elements of
+ THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
+ and CMP_OP2. */
+
+void
+s390_expand_vcond (rtx target, rtx then, rtx els,
+ enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
+{
+ rtx tmp;
+ machine_mode result_mode;
+ rtx result_target;
+
+ /* We always use an integral type vector to hold the comparison
+ result. */
+ result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
+ result_target = gen_reg_rtx (result_mode);
+
+ /* Alternatively this could be done by reload by lowering the cmp*
+ predicates. But it appears to be better for scheduling etc. to
+ have that in early. */
+ if (!REG_P (cmp_op1))
+ cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
+
+ if (!REG_P (cmp_op2))
+ cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
+
+ s390_expand_vec_compare (result_target, cond,
+ cmp_op1, cmp_op2);
+
+ /* If the results are supposed to be either -1 or 0 we are done
+ since this is what our compare instructions generate anyway. */
+ if (constm1_operand (then, GET_MODE (then))
+ && const0_operand (els, GET_MODE (els)))
+ {
+ emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
+ result_target, 0));
+ return;
+ }
+
+ /* Otherwise we will do a vsel afterwards. */
+ /* This gets triggered e.g.
+ with gcc.c-torture/compile/pr53410-1.c */
+ if (!REG_P (then))
+ then = force_reg (GET_MODE (target), then);
+
+ if (!REG_P (els))
+ els = force_reg (GET_MODE (target), els);
+
+ tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
+ result_target,
+ CONST0_RTX (result_mode));
+
+ /* We compared the result against zero above so we have to swap then
+ and els here. */
+ tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
+
+ gcc_assert (GET_MODE (target) == GET_MODE (then));
+ emit_insn (gen_rtx_SET (target, tmp));
+}
+
+/* Emit the RTX necessary to initialize the vector TARGET with values
+ in VALS. */
+void
+s390_expand_vec_init (rtx target, rtx vals)
+{
+ machine_mode mode = GET_MODE (target);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ bool all_same = true, all_regs = true, all_const_int = true;
+ rtx x;
+ int i;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+
+ if (!CONST_INT_P (x))
+ all_const_int = false;
+
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+
+ if (!REG_P (x))
+ all_regs = false;
+ }
+
+ /* Use vector gen mask or vector gen byte mask if possible. */
+ if (all_same && all_const_int
+ && (XVECEXP (vals, 0, 0) == const0_rtx
+ || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
+ NULL, NULL)
+ || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
+ {
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
+ return;
+ }
+
+ if (all_same)
+ {
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_DUPLICATE (mode,
+ XVECEXP (vals, 0, 0))));
+ return;
+ }
+
+ if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
+ {
+ /* Use vector load pair. */
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_VEC_CONCAT (mode,
+ XVECEXP (vals, 0, 0),
+ XVECEXP (vals, 0, 1))));
+ return;
+ }
+
+ /* We are about to set the vector elements one by one. Zero out the
+ full register first in order to help the data flow framework to
+ detect it as full VR set. */
+ emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
+
+ /* Unfortunately the vec_init expander is not allowed to fail. So
+ we have to implement the fallback ourselves. */
+ for (i = 0; i < n_elts; i++)
+ emit_insn (gen_rtx_SET (target,
+ gen_rtx_UNSPEC (mode,
+ gen_rtvec (3, XVECEXP (vals, 0, i),
+ GEN_INT (i), target),
+ UNSPEC_VEC_SET)));
+}
+
/* Structure to hold the initial parameters for a compare_and_swap operation
in HImode and QImode. */
fputs ("@DTPOFF", file);
}
+/* Return the proper mode for REGNO being represented in the dwarf
+ unwind table. */
+machine_mode
+s390_dwarf_frame_reg_mode (int regno)
+{
+ machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
+
+ /* The rightmost 64 bits of vector registers are call-clobbered. */
+ if (GET_MODE_SIZE (save_mode) > 8)
+ save_mode = DImode;
+
+ return save_mode;
+}
+
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
/* Implement TARGET_MANGLE_TYPE. */
'J': print tls_load/tls_gdcall/tls_ldcall suffix
'M': print the second word of a TImode operand.
'N': print the second word of a DImode operand.
- 'O': print only the displacement of a memory reference.
- 'R': print only the base register of a memory reference.
+ 'O': print only the displacement of a memory reference or address.
+ 'R': print only the base register of a memory reference or address.
'S': print S-type memory reference (base+displacement).
'Y': print shift count operand.
'b': print integer X as if it's an unsigned byte.
'c': print integer X as if it's an signed byte.
- 'e': "end" of DImode contiguous bitmask X.
- 'f': "end" of SImode contiguous bitmask X.
+ 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
+ 'f': "end" contiguous bitmask X in SImode.
'h': print integer X as if it's a signed halfword.
'i': print the first nonzero HImode part of X.
'j': print the first HImode part unequal to -1 of X.
'k': print the first nonzero SImode part of X.
'm': print the first SImode part unequal to -1 of X.
'o': print integer X as if it's an unsigned 32bit word.
- 's': "start" of DImode contiguous bitmask X.
- 't': "start" of SImode contiguous bitmask X.
+ 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
+ 't': CONST_INT: "start" of contiguous bitmask X in SImode.
+ CONST_VECTOR: Generate a bitmask for vgbm instruction.
'x': print integer X as if it's an unsigned halfword.
+ 'v': print register number as vector register (v1 instead of f1).
*/
void
struct s390_address ad;
int ret;
- if (!MEM_P (x))
- {
- output_operand_lossage ("memory reference expected for "
- "'O' output modifier");
- return;
- }
-
- ret = s390_decompose_address (XEXP (x, 0), &ad);
+ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
if (!ret
|| (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
struct s390_address ad;
int ret;
- if (!MEM_P (x))
- {
- output_operand_lossage ("memory reference expected for "
- "'R' output modifier");
- return;
- }
-
- ret = s390_decompose_address (XEXP (x, 0), &ad);
+ ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
if (!ret
|| (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
switch (GET_CODE (x))
{
case REG:
- fprintf (file, "%s", reg_names[REGNO (x)]);
+ /* Print FP regs as fx instead of vx when they are accessed
+ through non-vector mode. */
+ if (code == 'v'
+ || VECTOR_NOFP_REG_P (x)
+ || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
+ || (VECTOR_REG_P (x)
+ && (GET_MODE_SIZE (GET_MODE (x)) /
+ s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
+ fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
+ else
+ fprintf (file, "%s", reg_names[REGNO (x)]);
break;
case MEM:
code);
}
break;
+ case CONST_VECTOR:
+ switch (code)
+ {
+ case 'e':
+ case 's':
+ {
+ int start, stop, inner_len;
+ bool ok;
+
+ inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
+ ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
+ gcc_assert (ok);
+ if (code == 's' || code == 't')
+ ival = inner_len - stop - 1;
+ else
+ ival = inner_len - start - 1;
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+ }
+ break;
+ case 't':
+ {
+ unsigned mask;
+ bool ok = s390_bytemask_vector_p (x, &mask);
+ gcc_assert (ok);
+ fprintf (file, "%u", mask);
+ }
+ break;
+
+ default:
+ output_operand_lossage ("invalid constant vector for output "
+ "modifier '%c'", code);
+ }
+ break;
default:
if (code == 0)
/* We keep a list of constants which we have to add to internal
constant tables in the middle of large functions. */
-#define NR_C_MODES 11
+#define NR_C_MODES 31
machine_mode constant_modes[NR_C_MODES] =
{
TFmode, TImode, TDmode,
+ V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode,
DFmode, DImode, DDmode,
+ V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
SFmode, SImode, SDmode,
+ V4QImode, V2HImode, V1SImode, V1SFmode,
HImode,
- QImode
+ V2QImode, V1HImode,
+ QImode,
+ V1QImode
};
struct constant
mark_symbol_refs_as_used (exp);
break;
+ case MODE_VECTOR_INT:
+ case MODE_VECTOR_FLOAT:
+ {
+ int i;
+ machine_mode inner_mode;
+ gcc_assert (GET_CODE (exp) == CONST_VECTOR);
+
+ inner_mode = GET_MODE_INNER (GET_MODE (exp));
+ for (i = 0; i < XVECLEN (exp, 0); i++)
+ s390_output_pool_entry (XVECEXP (exp, 0, i),
+ inner_mode,
+ i == 0
+ ? align
+ : GET_MODE_BITSIZE (inner_mode));
+ }
+ break;
+
default:
gcc_unreachable ();
}
bool
s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
{
+ if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
+ return false;
+
switch (REGNO_REG_CLASS (regno))
{
+ case VEC_REGS:
+ return ((GET_MODE_CLASS (mode) == MODE_INT
+ && s390_class_max_nregs (VEC_REGS, mode) == 1)
+ || mode == DFmode
+ || s390_vector_mode_supported_p (mode));
+ break;
case FP_REGS:
+ if (TARGET_VX
+ && ((GET_MODE_CLASS (mode) == MODE_INT
+ && s390_class_max_nregs (FP_REGS, mode) == 1)
+ || mode == DFmode
+ || s390_vector_mode_supported_p (mode)))
+ return true;
+
if (REGNO_PAIR_OK (regno, mode))
{
if (mode == SImode || mode == DImode)
int
s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
{
+ int reg_size;
+ bool reg_pair_required_p = false;
+
switch (rclass)
{
case FP_REGS:
+ case VEC_REGS:
+ reg_size = TARGET_VX ? 16 : 8;
+
+ /* TF and TD modes would fit into a VR but we put them into a
+ register pair since we do not have 128bit FP instructions on
+ full VRs. */
+ if (TARGET_VX
+ && SCALAR_FLOAT_MODE_P (mode)
+ && GET_MODE_SIZE (mode) >= 16)
+ reg_pair_required_p = true;
+
+ /* Even if complex types would fit into a single FPR/VR we force
+ them into a register pair to deal with the parts more easily.
+ (FIXME: What about complex ints?) */
if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
- return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
- else
- return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
+ reg_pair_required_p = true;
+ break;
case ACCESS_REGS:
- return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
+ reg_size = 4;
+ break;
default:
+ reg_size = UNITS_PER_WORD;
break;
}
- return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ if (reg_pair_required_p)
+ return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
+
+ return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
+}
+
+/* Return TRUE if changing mode from FROM to TO should not be allowed
+ for register class CLASS. */
+
+int
+s390_cannot_change_mode_class (machine_mode from_mode,
+ machine_mode to_mode,
+ enum reg_class rclass)
+{
+ machine_mode small_mode;
+ machine_mode big_mode;
+
+ if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
+ return 0;
+
+ if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
+ {
+ small_mode = from_mode;
+ big_mode = to_mode;
+ }
+ else
+ {
+ small_mode = to_mode;
+ big_mode = from_mode;
+ }
+
+ /* Values residing in VRs are little-endian style. All modes are
+ placed left-aligned in an VR. This means that we cannot allow
+ switching between modes with differing sizes. Also if the vector
+ facility is available we still place TFmode values in VR register
+ pairs, since the only instructions we have operating on TFmodes
+ only deal with register pairs. Therefore we have to allow DFmode
+ subregs of TFmodes to enable the TFmode splitters. */
+ if (reg_classes_intersect_p (VEC_REGS, rclass)
+ && (GET_MODE_SIZE (small_mode) < 8
+ || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
+ return 1;
+
+ /* Likewise for access registers, since they have only half the
+ word size on 64-bit. */
+ if (reg_classes_intersect_p (ACCESS_REGS, rclass))
+ return 1;
+
+ return 0;
}
/* Return true if we use LRA instead of reload pass. */
return cfun_frame_layout.frame_size == 0;
}
+/* The VX ABI differs for vararg functions. Therefore we need the
+ prototype of the callee to be available when passing vector type
+ values. */
+static const char *
+s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
+{
+ return ((TARGET_VX_ABI
+ && typelist == 0
+ && VECTOR_TYPE_P (TREE_TYPE (val))
+ && (funcdecl == NULL_TREE
+ || (TREE_CODE (funcdecl) == FUNCTION_DECL
+ && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
+ ? N_("Vector argument passed to unprototyped function")
+ : NULL);
+}
+
+
/* Return the size in bytes of a function argument of
type TYPE and/or mode MODE. At least one of TYPE or
MODE must be specified. */
gcc_unreachable ();
}
+/* Return true if a function argument of type TYPE and mode MODE
+ is to be passed in a vector register, if available. */
+
+bool
+s390_function_arg_vector (machine_mode mode, const_tree type)
+{
+ if (!TARGET_VX_ABI)
+ return false;
+
+ if (s390_function_arg_size (mode, type) > 16)
+ return false;
+
+ /* No type info available for some library calls ... */
+ if (!type)
+ return VECTOR_MODE_P (mode);
+
+ /* The ABI says that record types with a single member are treated
+ just like that member would be. */
+ while (TREE_CODE (type) == RECORD_TYPE)
+ {
+ tree field, single = NULL_TREE;
+
+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ if (single == NULL_TREE)
+ single = TREE_TYPE (field);
+ else
+ return false;
+ }
+
+ if (single == NULL_TREE)
+ return false;
+ else
+ {
+ /* If the field declaration adds extra byte due to
+ e.g. padding this is not accepted as vector type. */
+ if (int_size_in_bytes (single) <= 0
+ || int_size_in_bytes (single) != int_size_in_bytes (type))
+ return false;
+ type = single;
+ }
+ }
+
+ return VECTOR_TYPE_P (type);
+}
+
/* Return true if a function argument of type TYPE and mode MODE
is to be passed in a floating-point register, if available. */
static bool
s390_function_arg_float (machine_mode mode, const_tree type)
{
- int size = s390_function_arg_size (mode, type);
- if (size > 8)
+ if (s390_function_arg_size (mode, type) > 8)
return false;
/* Soft-float changes the ABI: no floating-point registers are used. */
bool named ATTRIBUTE_UNUSED)
{
int size = s390_function_arg_size (mode, type);
+
+ if (s390_function_arg_vector (mode, type))
+ return false;
+
if (size > 8)
return true;
if (type)
{
if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
- return 1;
+ return true;
if (TREE_CODE (type) == COMPLEX_TYPE
|| TREE_CODE (type) == VECTOR_TYPE)
- return 1;
+ return true;
}
- return 0;
+ return false;
}
/* Update the data in CUM to advance over an argument of mode MODE and
static void
s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
- const_tree type, bool named ATTRIBUTE_UNUSED)
+ const_tree type, bool named)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
- if (s390_function_arg_float (mode, type))
+ if (s390_function_arg_vector (mode, type))
+ {
+ /* We are called for unnamed vector stdarg arguments which are
+ passed on the stack. In this case this hook does not have to
+ do anything since stack arguments are tracked by common
+ code. */
+ if (!named)
+ return;
+ cum->vrs += 1;
+ }
+ else if (s390_function_arg_float (mode, type))
{
cum->fprs += 1;
}
static rtx
s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
- const_tree type, bool named ATTRIBUTE_UNUSED)
+ const_tree type, bool named)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
- if (s390_function_arg_float (mode, type))
+
+ if (s390_function_arg_vector (mode, type))
+ {
+ /* Vector arguments being part of the ellipsis are passed on the
+ stack. */
+ if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
+ return NULL_RTX;
+
+ return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
+ }
+ else if (s390_function_arg_float (mode, type))
{
if (cum->fprs + 1 > FP_ARG_NUM_REG)
- return 0;
+ return NULL_RTX;
else
return gen_rtx_REG (mode, cum->fprs + 16);
}
int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
- return 0;
+ return NULL_RTX;
else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
return gen_rtx_REG (mode, cum->gprs + 2);
else if (n_gprs == 2)
|| TREE_CODE (type) == REAL_TYPE)
return int_size_in_bytes (type) > 8;
+ /* vector types which fit into a VR. */
+ if (TARGET_VX_ABI
+ && VECTOR_TYPE_P (type)
+ && int_size_in_bytes (type) <= 16)
+ return false;
+
/* Aggregates and similar constructs are always returned
in memory. */
if (AGGREGATE_TYPE_P (type)
|| TREE_CODE (type) == COMPLEX_TYPE
- || TREE_CODE (type) == VECTOR_TYPE)
+ || VECTOR_TYPE_P (type))
return true;
/* ??? We get called on all sorts of random stuff from
const_tree fntype_or_decl,
bool outgoing ATTRIBUTE_UNUSED)
{
+ /* For vector return types it is important to use the RET_TYPE
+ argument whenever available since the middle-end might have
+ changed the mode to a scalar mode. */
+ bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
+ || (!ret_type && VECTOR_MODE_P (mode)));
+
/* For normal functions perform the promotion as
promote_function_mode would do. */
if (ret_type)
fntype_or_decl, 1);
}
- gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
- gcc_assert (GET_MODE_SIZE (mode) <= 8);
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
+ || SCALAR_FLOAT_MODE_P (mode)
+ || (TARGET_VX_ABI && vector_ret_type_p));
+ gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
- if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
+ if (TARGET_VX_ABI && vector_ret_type_p)
+ return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
+ else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
return gen_rtx_REG (mode, 16);
else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
|| UNITS_PER_LONG == UNITS_PER_WORD)
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
- /* Find the overflow area. */
+ /* Find the overflow area.
+ FIXME: This currently is too pessimistic when the vector ABI is
+ enabled. In that case we *always* set up the overflow area
+ pointer. */
if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
- || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
+ || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
+ || TARGET_VX_ABI)
{
t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
ret = args.reg_save_area[args.gpr+8]
else
ret = *args.overflow_arg_area++;
+ } else if (vector value) {
+ ret = *args.overflow_arg_area;
+ args.overflow_arg_area += size / 8;
} else if (float value) {
if (args.fgpr < 2)
ret = args.reg_save_area[args.fpr+64]
tree f_gpr, f_fpr, f_ovf, f_sav;
tree gpr, fpr, ovf, sav, reg, t, u;
int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
- tree lab_false, lab_over, addr;
+ tree lab_false, lab_over;
+ tree addr = create_tmp_var (ptr_type_node, "addr");
+ bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
+ a stack slot. */
f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
f_fpr = DECL_CHAIN (f_gpr);
sav_scale = UNITS_PER_LONG;
size = UNITS_PER_LONG;
max_reg = GP_ARG_NUM_REG - n_reg;
+ left_align_p = false;
+ }
+ else if (s390_function_arg_vector (TYPE_MODE (type), type))
+ {
+ if (TARGET_DEBUG_ARG)
+ {
+ fprintf (stderr, "va_arg: vector type");
+ debug_tree (type);
+ }
+
+ indirect_p = 0;
+ reg = NULL_TREE;
+ n_reg = 0;
+ sav_ofs = 0;
+ sav_scale = 8;
+ max_reg = 0;
+ left_align_p = true;
}
else if (s390_function_arg_float (TYPE_MODE (type), type))
{
sav_ofs = 16 * UNITS_PER_LONG;
sav_scale = 8;
max_reg = FP_ARG_NUM_REG - n_reg;
+ left_align_p = false;
}
else
{
sav_scale = UNITS_PER_LONG;
max_reg = GP_ARG_NUM_REG - n_reg;
+ left_align_p = false;
}
/* Pull the value out of the saved registers ... */
- lab_false = create_artificial_label (UNKNOWN_LOCATION);
- lab_over = create_artificial_label (UNKNOWN_LOCATION);
- addr = create_tmp_var (ptr_type_node, "addr");
+ if (reg != NULL_TREE)
+ {
+ /*
+ if (reg > ((typeof (reg))max_reg))
+ goto lab_false;
- t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
- t = build2 (GT_EXPR, boolean_type_node, reg, t);
- u = build1 (GOTO_EXPR, void_type_node, lab_false);
- t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
- gimplify_and_add (t, pre_p);
+ addr = sav + sav_ofs + reg * save_scale;
- t = fold_build_pointer_plus_hwi (sav, sav_ofs);
- u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
- fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
- t = fold_build_pointer_plus (t, u);
+ goto lab_over;
- gimplify_assign (addr, t, pre_p);
+ lab_false:
+ */
+
+ lab_false = create_artificial_label (UNKNOWN_LOCATION);
+ lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+ t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
+ t = build2 (GT_EXPR, boolean_type_node, reg, t);
+ u = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
+ gimplify_and_add (t, pre_p);
- gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+ t = fold_build_pointer_plus_hwi (sav, sav_ofs);
+ u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
+ fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
+ t = fold_build_pointer_plus (t, u);
- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+ gimplify_assign (addr, t, pre_p);
+ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+ }
/* ... Otherwise out of the overflow area. */
t = ovf;
- if (size < UNITS_PER_LONG)
+ if (size < UNITS_PER_LONG && !left_align_p)
t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
- t = fold_build_pointer_plus_hwi (t, size);
+ if (size < UNITS_PER_LONG && left_align_p)
+ t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
+ else
+ t = fold_build_pointer_plus_hwi (t, size);
+
gimplify_assign (ovf, t, pre_p);
- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+ if (reg != NULL_TREE)
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
/* Increment register save count. */
- u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
- fold_convert (TREE_TYPE (reg), size_int (n_reg)));
- gimplify_and_add (u, pre_p);
+ if (n_reg > 0)
+ {
+ u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
+ fold_convert (TREE_TYPE (reg), size_int (n_reg)));
+ gimplify_and_add (u, pre_p);
+ }
if (indirect_p)
{
mode = TYPE_MODE (type);
gcc_assert (mode);
+ /* We assume that in the target function all parameters are
+ named. This only has an impact on vector argument register
+ usage none of which is call-saved. */
if (pass_by_reference (&cum_v, mode, type, true))
{
mode = Pmode;
type = build_pointer_type (type);
}
- parm_rtx = s390_function_arg (cum, mode, type, 0);
+ parm_rtx = s390_function_arg (cum, mode, type, true);
- s390_function_arg_advance (cum, mode, type, 0);
+ s390_function_arg_advance (cum, mode, type, true);
if (!parm_rtx)
continue;
for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
call_used_regs[i] = fixed_regs[i] = 1;
}
+
+ /* Disable v16 - v31 for non-vector target. */
+ if (!TARGET_VX)
+ {
+ for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
+ fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
+ }
}
/* Corresponding function to eh_return expander. */
#undef FPC_DXC_SHIFT
}
+/* Return the vector mode to be used for inner mode MODE when doing
+ vectorization. */
+static machine_mode
+s390_preferred_simd_mode (machine_mode mode)
+{
+ if (TARGET_VX)
+ switch (mode)
+ {
+ case DFmode:
+ return V2DFmode;
+ case DImode:
+ return V2DImode;
+ case SImode:
+ return V4SImode;
+ case HImode:
+ return V8HImode;
+ case QImode:
+ return V16QImode;
+ default:;
+ }
+ return word_mode;
+}
+
+/* Our hardware does not require vectors to be strictly aligned. */
+static bool
+s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
+ const_tree type ATTRIBUTE_UNUSED,
+ int misalignment ATTRIBUTE_UNUSED,
+ bool is_packed ATTRIBUTE_UNUSED)
+{
+ return true;
+}
+
+/* The vector ABI requires vector types to be aligned on an 8 byte
+ boundary (our stack alignment). However, we allow this to be
+ overriden by the user, while this definitely breaks the ABI. */
+static HOST_WIDE_INT
+s390_vector_alignment (const_tree type)
+{
+ if (!TARGET_VX_ABI)
+ return default_vector_alignment (type);
+
+ if (TYPE_USER_ALIGN (type))
+ return TYPE_ALIGN (type);
+
+ return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
+}
+
+
/* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_FUNCTION_VALUE s390_function_value
#undef TARGET_LIBCALL_VALUE
#define TARGET_LIBCALL_VALUE s390_libcall_value
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
#undef TARGET_KEEP_LEAF_WHEN_PROFILED
#define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
#define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
#endif
+#undef TARGET_DWARF_FRAME_REG_MODE
+#define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
+
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE s390_mangle_type
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
+
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
+#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
+#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
+
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
+
+#undef TARGET_VECTOR_ALIGNMENT
+#define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"
#define STACK_SIZE_MODE (Pmode)
+/* Vector arguments are left-justified when placed on the stack during
+ parameter passing. */
+#define FUNCTION_ARG_PADDING(MODE, TYPE) \
+ (s390_function_arg_vector ((MODE), (TYPE)) \
+ ? upward \
+ : DEFAULT_FUNCTION_ARG_PADDING ((MODE), (TYPE)))
+
#ifndef IN_LIBGCC2
/* Width of a word, in units (bytes). */
Reg 35: Return address pointer
Registers 36 and 37 are mapped to access registers
- 0 and 1, used to implement thread-local storage. */
+ 0 and 1, used to implement thread-local storage.
+
+ Reg 38-53: Vector registers v16-v31 */
-#define FIRST_PSEUDO_REGISTER 38
+#define FIRST_PSEUDO_REGISTER 54
/* Standard register usage. */
#define GENERAL_REGNO_P(N) ((int)(N) >= 0 && (N) < 16)
#define CC_REGNO_P(N) ((N) == 33)
#define FRAME_REGNO_P(N) ((N) == 32 || (N) == 34 || (N) == 35)
#define ACCESS_REGNO_P(N) ((N) == 36 || (N) == 37)
+#define VECTOR_NOFP_REGNO_P(N) ((N) >= 38 && (N) <= 53)
+#define VECTOR_REGNO_P(N) (FP_REGNO_P (N) || VECTOR_NOFP_REGNO_P (N))
#define GENERAL_REG_P(X) (REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
#define ADDR_REG_P(X) (REG_P (X) && ADDR_REGNO_P (REGNO (X)))
#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
#define FRAME_REG_P(X) (REG_P (X) && FRAME_REGNO_P (REGNO (X)))
#define ACCESS_REG_P(X) (REG_P (X) && ACCESS_REGNO_P (REGNO (X)))
+#define VECTOR_NOFP_REG_P(X) (REG_P (X) && VECTOR_NOFP_REGNO_P (REGNO (X)))
+#define VECTOR_REG_P(X) (REG_P (X) && VECTOR_REGNO_P (REGNO (X)))
/* Set up fixed registers and calling convention:
On 31-bit, FPRs 18-19 are call-clobbered;
on 64-bit, FPRs 24-31 are call-clobbered.
- The remaining FPRs are call-saved. */
+ The remaining FPRs are call-saved.
+
+ All non-FP vector registers are call-clobbered v16-v31. */
#define FIXED_REGISTERS \
{ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, \
1, 1, 1, 1, \
- 1, 1 }
+ 1, 1, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0 }
#define CALL_USED_REGISTERS \
{ 1, 1, 1, 1, \
1, 1, 1, 1, \
1, 1, 1, 1, \
1, 1, 1, 1, \
- 1, 1 }
+ 1, 1, \
+ 1, 1, 1, 1, \
+ 1, 1, 1, 1, \
+ 1, 1, 1, 1, \
+ 1, 1, 1, 1 }
#define CALL_REALLY_USED_REGISTERS \
-{ 1, 1, 1, 1, \
+{ 1, 1, 1, 1, /* r0 - r15 */ \
1, 1, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, \
+ 1, 1, 1, 1, /* f0 (16) - f15 (31) */ \
1, 1, 1, 1, \
1, 1, 1, 1, \
1, 1, 1, 1, \
- 1, 1, 1, 1, \
+ 1, 1, 1, 1, /* arg, cc, fp, ret addr */ \
+ 0, 0, /* a0 (36), a1 (37) */ \
+ 1, 1, 1, 1, /* v16 (38) - v23 (45) */ \
1, 1, 1, 1, \
- 0, 0 }
+ 1, 1, 1, 1, /* v24 (46) - v31 (53) */ \
+ 1, 1, 1, 1 }
/* Preferred register allocation order. */
-#define REG_ALLOC_ORDER \
-{ 1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13, \
- 16, 17, 18, 19, 20, 21, 22, 23, \
- 24, 25, 26, 27, 28, 29, 30, 31, \
- 15, 32, 33, 34, 35, 36, 37 }
+#define REG_ALLOC_ORDER \
+ { 1, 2, 3, 4, 5, 0, 12, 11, 10, 9, 8, 7, 6, 14, 13, \
+ 16, 17, 18, 19, 20, 21, 22, 23, \
+ 24, 25, 26, 27, 28, 29, 30, 31, \
+ 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, \
+ 15, 32, 33, 34, 35, 36, 37 }
/* Fitting values into registers. */
but conforms to the 31-bit ABI, GPRs can hold 8 bytes;
the ABI guarantees only that the lower 4 bytes are
saved across calls, however. */
-#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \
- (!TARGET_64BIT && TARGET_ZARCH \
- && GET_MODE_SIZE (MODE) > 4 \
- && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32))
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \
+ ((!TARGET_64BIT && TARGET_ZARCH \
+ && GET_MODE_SIZE (MODE) > 4 \
+ && (((REGNO) >= 6 && (REGNO) <= 15) || (REGNO) == 32)) \
+ || (TARGET_VX \
+ && GET_MODE_SIZE (MODE) > 8 \
+ && (((TARGET_64BIT && (REGNO) >= 24 && (REGNO) <= 31)) \
+ || (!TARGET_64BIT && ((REGNO) == 18 || (REGNO) == 19)))))
/* Maximum number of registers to represent a value of mode MODE
in a register of class CLASS. */
#define CLASS_MAX_NREGS(CLASS, MODE) \
s390_class_max_nregs ((CLASS), (MODE))
-/* If a 4-byte value is loaded into a FPR, it is placed into the
- *upper* half of the register, not the lower. Therefore, we
- cannot use SUBREGs to switch between modes in FP registers.
- Likewise for access registers, since they have only half the
- word size on 64-bit. */
#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
- (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \
- ? ((reg_classes_intersect_p (FP_REGS, CLASS) \
- && (GET_MODE_SIZE (FROM) < 8 || GET_MODE_SIZE (TO) < 8)) \
- || reg_classes_intersect_p (ACCESS_REGS, CLASS)) : 0)
+ s390_cannot_change_mode_class ((FROM), (TO), (CLASS))
/* Register classes. */
NO_REGS, CC_REGS, ADDR_REGS, GENERAL_REGS, ACCESS_REGS,
ADDR_CC_REGS, GENERAL_CC_REGS,
FP_REGS, ADDR_FP_REGS, GENERAL_FP_REGS,
+ VEC_REGS, ADDR_VEC_REGS, GENERAL_VEC_REGS,
ALL_REGS, LIM_REG_CLASSES
};
#define N_REG_CLASSES (int) LIM_REG_CLASSES
#define REG_CLASS_NAMES \
{ "NO_REGS", "CC_REGS", "ADDR_REGS", "GENERAL_REGS", "ACCESS_REGS", \
"ADDR_CC_REGS", "GENERAL_CC_REGS", \
- "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", "ALL_REGS" }
+ "FP_REGS", "ADDR_FP_REGS", "GENERAL_FP_REGS", \
+ "VEC_REGS", "ADDR_VEC_REGS", "GENERAL_VEC_REGS", \
+ "ALL_REGS" }
/* Class -> register mapping. */
-#define REG_CLASS_CONTENTS \
-{ \
+#define REG_CLASS_CONTENTS \
+{ \
{ 0x00000000, 0x00000000 }, /* NO_REGS */ \
{ 0x00000000, 0x00000002 }, /* CC_REGS */ \
{ 0x0000fffe, 0x0000000d }, /* ADDR_REGS */ \
{ 0xffff0000, 0x00000000 }, /* FP_REGS */ \
{ 0xfffffffe, 0x0000000d }, /* ADDR_FP_REGS */ \
{ 0xffffffff, 0x0000000d }, /* GENERAL_FP_REGS */ \
- { 0xffffffff, 0x0000003f }, /* ALL_REGS */ \
+ { 0xffff0000, 0x003fffc0 }, /* VEC_REGS */ \
+ { 0xfffffffe, 0x003fffcd }, /* ADDR_VEC_REGS */ \
+ { 0xffffffff, 0x003fffcd }, /* GENERAL_VEC_REGS */ \
+ { 0xffffffff, 0x003fffff }, /* ALL_REGS */ \
}
/* In some case register allocation order is not enough for IRA to
#define REGNO_OK_FOR_BASE_P(REGNO) REGNO_OK_FOR_INDEX_P (REGNO)
-/* We need secondary memory to move data between GPRs and FPRs. With
- DFP the ldgr lgdr instructions are available. But these
- instructions do not handle GPR pairs so it is not possible for 31
- bit. */
-#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
- ((CLASS1) != (CLASS2) \
- && ((CLASS1) == FP_REGS || (CLASS2) == FP_REGS) \
- && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8))
+/* We need secondary memory to move data between GPRs and FPRs.
+
+ - With DFP the ldgr lgdr instructions are available. Due to the
+ different alignment we cannot use them for SFmode. For 31 bit a
+ 64 bit value in GPR would be a register pair so here we still
+ need to go via memory.
+
+ - With z13 we can do the SF/SImode moves with vlgvf. Due to the
+ overlapping of FPRs and VRs we still disallow TF/TD modes to be
+ in full VRs so as before also on z13 we do these moves via
+ memory.
+
+ FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+ (((reg_classes_intersect_p (CLASS1, VEC_REGS) \
+ && reg_classes_intersect_p (CLASS2, GENERAL_REGS)) \
+ || (reg_classes_intersect_p (CLASS1, GENERAL_REGS) \
+ && reg_classes_intersect_p (CLASS2, VEC_REGS))) \
+ && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (MODE) != 8) \
+ && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (MODE) \
+ && GET_MODE_SIZE (MODE) > 8)))
/* Get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
because the movsi and movsf patterns don't handle r/f moves. */
/* Let the assembler generate debug line info. */
#define DWARF2_ASM_LINE_DEBUG_INFO 1
+/* Define the dwarf register mapping.
+ v16-v31 -> 68-83
+ rX -> X otherwise */
+#define DBX_REGISTER_NUMBER(regno) \
+ ((regno >= 38 && regno <= 53) ? regno + 30 : regno)
/* Frame registers. */
{
int gprs; /* gpr so far */
int fprs; /* fpr so far */
+ int vrs; /* vr so far */
}
CUMULATIVE_ARGS;
#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, NN, N_NAMED_ARGS) \
- ((CUM).gprs=0, (CUM).fprs=0)
+ ((CUM).gprs=0, (CUM).fprs=0, (CUM).vrs=0)
+
+#define FIRST_VEC_ARG_REGNO 46
+#define LAST_VEC_ARG_REGNO 53
/* Arguments can be placed in general registers 2 to 6, or in floating
point registers 0 and 2 for 31 bit and fprs 0, 2, 4 and 6 for 64
bit. */
-#define FUNCTION_ARG_REGNO_P(N) (((N) >=2 && (N) <7) || \
- (N) == 16 || (N) == 17 || (TARGET_64BIT && ((N) == 18 || (N) == 19)))
+#define FUNCTION_ARG_REGNO_P(N) \
+ (((N) >=2 && (N) < 7) || (N) == 16 || (N) == 17 \
+ || (TARGET_64BIT && ((N) == 18 || (N) == 19)) \
+ || (TARGET_VX && ((N) >= FIRST_VEC_ARG_REGNO && (N) <= LAST_VEC_ARG_REGNO)))
-/* Only gpr 2 and fpr 0 are ever used as return registers. */
-#define FUNCTION_VALUE_REGNO_P(N) ((N) == 2 || (N) == 16)
+/* Only gpr 2, fpr 0, and v24 are ever used as return registers. */
+#define FUNCTION_VALUE_REGNO_P(N) \
+ ((N) == 2 || (N) == 16 \
+ || (TARGET_VX && (N) == FIRST_VEC_ARG_REGNO))
/* Function entry and exit. */
/* How to refer to registers in assembler output. This sequence is
indexed by compiler's hard-register-number (see above). */
#define REGISTER_NAMES \
-{ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \
- "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \
- "%f0", "%f2", "%f4", "%f6", "%f1", "%f3", "%f5", "%f7", \
- "%f8", "%f10", "%f12", "%f14", "%f9", "%f11", "%f13", "%f15", \
- "%ap", "%cc", "%fp", "%rp", "%a0", "%a1" \
-}
+ { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", \
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", \
+ "%f0", "%f2", "%f4", "%f6", "%f1", "%f3", "%f5", "%f7", \
+ "%f8", "%f10", "%f12", "%f14", "%f9", "%f11", "%f13", "%f15", \
+ "%ap", "%cc", "%fp", "%rp", "%a0", "%a1", \
+ "%v16", "%v18", "%v20", "%v22", "%v17", "%v19", "%v21", "%v23", \
+ "%v24", "%v26", "%v28", "%v30", "%v25", "%v27", "%v29", "%v31" \
+ }
+
+#define ADDITIONAL_REGISTER_NAMES \
+ { { "v0", 16 }, { "v2", 17 }, { "v4", 18 }, { "v6", 19 }, \
+ { "v1", 20 }, { "v3", 21 }, { "v5", 22 }, { "v7", 23 }, \
+ { "v8", 24 }, { "v10", 25 }, { "v12", 26 }, { "v14", 27 }, \
+ { "v9", 28 }, { "v11", 29 }, { "v13", 30 }, { "v15", 31 } };
/* Print operand X (an rtx) in assembler syntax to file FILE. */
#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE)
#define SYMBOL_REF_NOT_NATURALLY_ALIGNED_P(X) \
((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_NOT_NATURALLY_ALIGNED))
+/* Check whether integer displacement is in range for a short displacement. */
+#define SHORT_DISP_IN_RANGE(d) ((d) >= 0 && (d) <= 4095)
+
/* Check whether integer displacement is in range. */
#define DISP_IN_RANGE(d) \
(TARGET_LONG_DISPLACEMENT? ((d) >= -524288 && (d) <= 524287) \
- : ((d) >= 0 && (d) <= 4095))
+ : SHORT_DISP_IN_RANGE(d))
/* Reads can reuse write prefetches, used by tree-ssa-prefetch-loops.c. */
#define READ_CAN_USE_WRITE_PREFETCH 1
extern const int processor_flags_table[];
-#endif
+
+/* The truth element value for vector comparisons. Our instructions
+ always generate -1 in that case. */
+#define VECTOR_STORE_FLAG_VALUE(MODE) CONSTM1_RTX (GET_MODE_INNER (MODE))
+
+#endif /* S390_H */
UNSPEC_FPINT_CEIL
UNSPEC_FPINT_NEARBYINT
UNSPEC_FPINT_RINT
- ])
+
+ ; Vector
+ UNSPEC_VEC_EXTRACT
+ UNSPEC_VEC_SET
+ UNSPEC_VEC_PERM
+ UNSPEC_VEC_SRLB
+ UNSPEC_VEC_GENBYTEMASK
+ UNSPEC_VEC_VSUM
+ UNSPEC_VEC_VSUMG
+ UNSPEC_VEC_SMULT_EVEN
+ UNSPEC_VEC_UMULT_EVEN
+ UNSPEC_VEC_SMULT_ODD
+ UNSPEC_VEC_UMULT_ODD
+ UNSPEC_VEC_LOAD_LEN
+ UNSPEC_VEC_VFENE
+ UNSPEC_VEC_VFENECC
+])
;;
;; UNSPEC_VOLATILE usage
(FPR13_REGNUM 30)
(FPR14_REGNUM 27)
(FPR15_REGNUM 31)
+ (VR0_REGNUM 16)
+ (VR16_REGNUM 38)
+ (VR23_REGNUM 45)
+ (VR24_REGNUM 46)
+ (VR31_REGNUM 53)
])
;;
;; Used to determine defaults for length and other attribute values.
(define_attr "op_type"
- "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS"
+ "NN,E,RR,RRE,RX,RS,RSI,RI,SI,S,SS,SSE,RXE,RSE,RIL,RIE,RXY,RSY,SIY,RRF,RRR,SIL,RRS,RIS,VRI,VRR,VRS,VRV,VRX"
(const_string "NN"))
;; Instruction type attribute used for scheduling.
;; Iterators
+(define_mode_iterator ALL [TI DI SI HI QI TF DF SF TD DD SD V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF V2SF V4SF V1TI V1DF V2DF V1TF])
+
;; These mode iterators allow floating point patterns to be generated from the
;; same template.
(define_mode_iterator FP_ALL [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")
(SD "TARGET_HARD_DFP")])
(define_mode_iterator FP [TF DF SF (TD "TARGET_HARD_DFP") (DD "TARGET_HARD_DFP")])
-(define_mode_iterator FPALL [TF DF SF TD DD SD])
(define_mode_iterator BFP [TF DF SF])
(define_mode_iterator DFP [TD DD])
(define_mode_iterator DFP_ALL [TD DD SD])
;; This mode iterator allows the integer patterns to be defined from the
;; same template.
(define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI])
-(define_mode_iterator INTALL [TI DI SI HI QI])
(define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI])
;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from
;; Allow return and simple_return to be defined from a single template.
(define_code_iterator ANY_RETURN [return simple_return])
+(include "vector.md")
+
;;
;;- Compare instructions.
;;
; movti instruction pattern(s).
;
+; FIXME: More constants are possible by enabling jxx, jyy constraints
+; for TImode (use double-int for the calculations)
(define_insn "movti"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,d,o")
- (match_operand:TI 1 "general_operand" "QS,d,dPRT,d"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=d,QS,v, v, v,v,d, v,QR, d,o")
+ (match_operand:TI 1 "general_operand" "QS, d,v,j00,jm1,d,v,QR, v,dPRT,d"))]
"TARGET_ZARCH"
"@
lmg\t%0,%N0,%S1
stmg\t%1,%N1,%S0
+ vlr\t%v0,%v1
+ vzero\t%v0
+ vone\t%v0
+ vlvgp\t%v0,%1,%N1
+ #
+ vl\t%v0,%1
+ vst\t%v1,%0
#
#"
- [(set_attr "op_type" "RSY,RSY,*,*")
- (set_attr "type" "lm,stm,*,*")])
+ [(set_attr "op_type" "RSY,RSY,VRR,VRI,VRI,VRR,*,VRX,VRX,*,*")
+ (set_attr "type" "lm,stm,*,*,*,*,*,*,*,*,*")
+ (set_attr "cpu_facility" "*,*,vec,vec,vec,vec,vec,vec,vec,*,*")])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
operands[5] = operand_subword (operands[1], 0, 0, TImode);
})
+; Use part of the TImode target reg to perform the address
+; calculation. If the TImode value is supposed to be copied into a VR
+; this splitter is not necessary.
(define_split
[(set (match_operand:TI 0 "register_operand" "")
(match_operand:TI 1 "memory_operand" ""))]
"TARGET_ZARCH && reload_completed
+ && !VECTOR_REG_P (operands[0])
&& !s_operand (operands[1], VOIDmode)"
[(set (match_dup 0) (match_dup 1))]
{
})
+; Split a VR -> GPR TImode move into 2 vector load GR from VR element.
+; For the higher order bits we do simply a DImode move while the
+; second part is done via vec extract. Both will end up as vlgvg.
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (match_operand:TI 1 "register_operand" ""))]
+ "TARGET_VX && reload_completed
+ && GENERAL_REG_P (operands[0])
+ && VECTOR_REG_P (operands[1])"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (unspec:DI [(match_dup 5) (const_int 1)]
+ UNSPEC_VEC_EXTRACT))]
+{
+ operands[2] = operand_subword (operands[0], 0, 0, TImode);
+ operands[3] = operand_subword (operands[0], 1, 0, TImode);
+ operands[4] = gen_rtx_REG (DImode, REGNO (operands[1]));
+ operands[5] = gen_rtx_REG (V2DImode, REGNO (operands[1]));
+})
+
;
; Patterns used for secondary reloads
;
; Unfortunately there is no such variant for QI, TI and FP mode moves.
; These patterns are also used for unaligned SI and DI accesses.
-(define_expand "reload<INTALL:mode><P:mode>_tomem_z10"
- [(parallel [(match_operand:INTALL 0 "memory_operand" "")
- (match_operand:INTALL 1 "register_operand" "=d")
- (match_operand:P 2 "register_operand" "=&a")])]
+(define_expand "reload<ALL:mode><P:mode>_tomem_z10"
+ [(parallel [(match_operand:ALL 0 "memory_operand" "")
+ (match_operand:ALL 1 "register_operand" "=d")
+ (match_operand:P 2 "register_operand" "=&a")])]
"TARGET_Z10"
{
s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
DONE;
})
-(define_expand "reload<INTALL:mode><P:mode>_toreg_z10"
- [(parallel [(match_operand:INTALL 0 "register_operand" "=d")
- (match_operand:INTALL 1 "memory_operand" "")
- (match_operand:P 2 "register_operand" "=a")])]
- "TARGET_Z10"
-{
- s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
- DONE;
-})
-
-(define_expand "reload<FPALL:mode><P:mode>_tomem_z10"
- [(parallel [(match_operand:FPALL 0 "memory_operand" "")
- (match_operand:FPALL 1 "register_operand" "=d")
- (match_operand:P 2 "register_operand" "=&a")])]
- "TARGET_Z10"
-{
- s390_reload_symref_address (operands[1], operands[0], operands[2], 1);
- DONE;
-})
-
-(define_expand "reload<FPALL:mode><P:mode>_toreg_z10"
- [(parallel [(match_operand:FPALL 0 "register_operand" "=d")
- (match_operand:FPALL 1 "memory_operand" "")
- (match_operand:P 2 "register_operand" "=a")])]
+(define_expand "reload<ALL:mode><P:mode>_toreg_z10"
+ [(parallel [(match_operand:ALL 0 "register_operand" "=d")
+ (match_operand:ALL 1 "memory_operand" "")
+ (match_operand:P 2 "register_operand" "=a")])]
"TARGET_Z10"
{
s390_reload_symref_address (operands[0], operands[1], operands[2], 0);
DONE;
})
-; Handles assessing a non-offsetable memory address
+; Not all the indirect memory access instructions support the full
+; format (long disp + index + base). So whenever a move from/to such
+; an address is required and the instruction cannot deal with it we do
+; a load address into a scratch register first and use this as the new
+; base register.
+; This in particular is used for:
+; - non-offsetable memory accesses for multiword moves
+; - full vector reg moves with long displacements
-(define_expand "reload<mode>_nonoffmem_in"
+(define_expand "reload<mode>_la_in"
[(parallel [(match_operand 0 "register_operand" "")
(match_operand 1 "" "")
(match_operand:P 2 "register_operand" "=&a")])]
DONE;
})
-(define_expand "reload<mode>_nonoffmem_out"
+(define_expand "reload<mode>_la_out"
[(parallel [(match_operand 0 "" "")
(match_operand 1 "register_operand" "")
(match_operand:P 2 "register_operand" "=&a")])]
(define_insn "*movdi_64"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=d,d,d,d,d,d,d,d,f,d,d,d,d,d,
- RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t")
+ "=d, d, d, d, d, d, d, d,f,d,d,d,d, d,RT,!*f,!*f,!*f,!R,!T,b,Q,d,t,Q,t,v,v,v,d, v,QR")
(match_operand:DI 1 "general_operand"
- "K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT,
- d,*f,R,T,*f,*f,d,K,t,d,t,Q"))]
+ " K,N0HD0,N1HD0,N2HD0,N3HD0,Os,N0SD0,N1SD0,d,f,L,b,d,RT, d, *f, R, T,*f,*f,d,K,t,d,t,Q,K,v,d,v,QR, v"))]
"TARGET_ZARCH"
"@
lghi\t%0,%h1
#
#
stam\t%1,%N1,%S0
- lam\t%0,%N0,%S1"
+ lam\t%0,%N0,%S1
+ vleig\t%v0,%h1,0
+ vlr\t%v0,%v1
+ vlvgg\t%v0,%1,0
+ vlgvg\t%0,%v1,0
+ vleg\t%v0,%1,0
+ vsteg\t%v1,%0,0"
[(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RXY,RIL,RRE,RXY,
- RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS")
+ RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS,VRI,VRR,VRS,VRS,VRX,VRX")
(set_attr "type" "*,*,*,*,*,*,*,*,floaddf,floaddf,la,larl,lr,load,store,
- floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,
- *,*")
+ floaddf,floaddf,floaddf,fstoredf,fstoredf,larl,*,*,*,*,
+ *,*,*,*,*,*,*")
(set_attr "cpu_facility" "*,*,*,*,*,extimm,extimm,extimm,dfp,dfp,longdisp,
z10,*,*,*,*,*,longdisp,*,longdisp,
- z10,z10,*,*,*,*")
+ z10,z10,*,*,*,*,vec,vec,vec,vec,vec,vec")
(set_attr "z10prop" "z10_fwd_A1,
z10_fwd_E1,
z10_fwd_E1,
*,
*,
*,
- *")
+ *,*,*,*,*,*,*")
])
(define_split
(define_insn "*movsi_zarch"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=d,d,d,d,d,d,d,d,d,R,T,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t")
+ "=d, d, d, d,d,d,d,d,d,R,T,!*f,!*f,!*f,!*f,!*f,!R,!T,d,t,Q,b,Q,t,v,v,v,d, v,QR")
(match_operand:SI 1 "general_operand"
- "K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d,*f,R,T,*f,*f,t,d,t,d,K,Q"))]
+ " K,N0HS0,N1HS0,Os,L,b,d,R,T,d,d, *f, *f, R, R, T,*f,*f,t,d,t,d,K,Q,K,v,d,v,QR, v"))]
"TARGET_ZARCH"
"@
lhi\t%0,%h1
ly\t%0,%1
st\t%1,%0
sty\t%1,%0
+ lder\t%0,%1
ler\t%0,%1
+ lde\t%0,%1
le\t%0,%1
ley\t%0,%1
ste\t%1,%0
stam\t%1,%1,%S0
strl\t%1,%0
mvhi\t%0,%1
- lam\t%0,%0,%S1"
+ lam\t%0,%0,%S1
+ vleif\t%v0,%h1,0
+ vlr\t%v0,%v1
+ vlvgf\t%v0,%1,0
+ vlgvf\t%0,%v1,0
+ vlef\t%v0,%1,0
+ vstef\t%v1,%0,0"
[(set_attr "op_type" "RI,RI,RI,RIL,RXY,RIL,RR,RX,RXY,RX,RXY,
- RR,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS")
+ RRE,RR,RXE,RX,RXY,RX,RXY,RRE,RRE,RS,RIL,SIL,RS,VRI,VRR,VRS,VRS,VRX,VRX")
(set_attr "type" "*,
*,
*,
floadsf,
floadsf,
floadsf,
+ floadsf,
+ floadsf,
fstoresf,
fstoresf,
*,
*,
larl,
*,
- *")
+ *,*,*,*,*,*,*")
(set_attr "cpu_facility" "*,*,*,extimm,longdisp,z10,*,*,longdisp,*,longdisp,
- *,*,longdisp,*,longdisp,*,*,*,z10,z10,*")
+ vec,*,vec,*,longdisp,*,longdisp,*,*,*,z10,z10,*,vec,vec,vec,vec,vec,vec")
(set_attr "z10prop" "z10_fwd_A1,
z10_fwd_E1,
z10_fwd_E1,
*,
*,
*,
+ *,
+ *,
z10_super_E1,
z10_super,
*,
z10_rec,
z10_super,
- *")])
+ *,*,*,*,*,*,*")])
(define_insn "*movsi_esa"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!R,d,t,Q,t")
- (match_operand:SI 1 "general_operand" "K,d,R,d,*f,R,*f,t,d,t,Q"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,d,R,!*f,!*f,!*f,!*f,!R,d,t,Q,t")
+ (match_operand:SI 1 "general_operand" "K,d,R,d, *f, *f, R, R,*f,t,d,t,Q"))]
"!TARGET_ZARCH"
"@
lhi\t%0,%h1
lr\t%0,%1
l\t%0,%1
st\t%1,%0
+ lder\t%0,%1
ler\t%0,%1
+ lde\t%0,%1
le\t%0,%1
ste\t%1,%0
ear\t%0,%1
sar\t%0,%1
stam\t%1,%1,%S0
lam\t%0,%0,%S1"
- [(set_attr "op_type" "RI,RR,RX,RX,RR,RX,RX,RRE,RRE,RS,RS")
- (set_attr "type" "*,lr,load,store,floadsf,floadsf,fstoresf,*,*,*,*")
- (set_attr "z10prop" "z10_fwd_A1,
- z10_fr_E1,
- z10_fwd_A3,
- z10_rec,
- *,
- *,
- *,
- z10_super_E1,
- z10_super,
- *,
- *")
+ [(set_attr "op_type" "RI,RR,RX,RX,RRE,RR,RXE,RX,RX,RRE,RRE,RS,RS")
+ (set_attr "type" "*,lr,load,store,floadsf,floadsf,floadsf,floadsf,fstoresf,*,*,*,*")
+ (set_attr "z10prop" "z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec,*,*,*,*,*,z10_super_E1,
+ z10_super,*,*")
+ (set_attr "cpu_facility" "*,*,*,*,vec,*,vec,*,*,*,*,*,*")
])
(define_peephole2
})
(define_insn "*movhi"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q")
- (match_operand:HI 1 "general_operand" " d,n,R,T,b,d,d,d,K"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,d,d,R,T,b,Q,v,v,v,d, v,QR")
+ (match_operand:HI 1 "general_operand" " d,n,R,T,b,d,d,d,K,K,v,d,v,QR, v"))]
""
"@
lr\t%0,%1
sth\t%1,%0
sthy\t%1,%0
sthrl\t%1,%0
- mvhhi\t%0,%1"
- [(set_attr "op_type" "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL")
- (set_attr "type" "lr,*,*,*,larl,store,store,store,*")
- (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10")
+ mvhhi\t%0,%1
+ vleih\t%v0,%h1,0
+ vlr\t%v0,%v1
+ vlvgh\t%v0,%1,0
+ vlgvh\t%0,%v1,0
+ vleh\t%v0,%1,0
+ vsteh\t%v1,%0,0"
+ [(set_attr "op_type" "RR,RI,RX,RXY,RIL,RX,RXY,RIL,SIL,VRI,VRR,VRS,VRS,VRX,VRX")
+ (set_attr "type" "lr,*,*,*,larl,store,store,store,*,*,*,*,*,*,*")
+ (set_attr "cpu_facility" "*,*,*,*,z10,*,*,z10,z10,vec,vec,vec,vec,vec,vec")
(set_attr "z10prop" "z10_fr_E1,
z10_fwd_A1,
z10_super_E1,
z10_rec,
z10_rec,
z10_rec,
- z10_super")])
+ z10_super,*,*,*,*,*,*")])
(define_peephole2
[(set (match_operand:HI 0 "register_operand" "")
})
(define_insn "*movqi"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q")
- (match_operand:QI 1 "general_operand" " d,n,R,T,d,d,n,n,?Q"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,d,R,T,Q,S,?Q,v,v,v,d, v,QR")
+ (match_operand:QI 1 "general_operand" " d,n,R,T,d,d,n,n,?Q,K,v,d,v,QR, v"))]
""
"@
lr\t%0,%1
stcy\t%1,%0
mvi\t%S0,%b1
mviy\t%S0,%b1
- #"
- [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS")
- (set_attr "type" "lr,*,*,*,store,store,store,store,*")
+ #
+ vleib\t%v0,%b1,0
+ vlr\t%v0,%v1
+ vlvgb\t%v0,%1,0
+ vlgvb\t%0,%v1,0
+ vleb\t%v0,%1,0
+ vsteb\t%v1,%0,0"
+ [(set_attr "op_type" "RR,RI,RX,RXY,RX,RXY,SI,SIY,SS,VRI,VRR,VRS,VRS,VRX,VRX")
+ (set_attr "type" "lr,*,*,*,store,store,store,store,*,*,*,*,*,*,*")
+ (set_attr "cpu_facility" "*,*,*,*,*,*,*,*,*,vec,vec,vec,vec,vec,vec")
(set_attr "z10prop" "z10_fr_E1,
z10_fwd_A1,
z10_super_E1,
z10_rec,
z10_super,
z10_super,
- *")])
+ *,*,*,*,*,*,*")])
(define_peephole2
[(set (match_operand:QI 0 "nonimmediate_operand" "")
[(set (match_operand:TD_TF 0 "register_operand" "")
(match_operand:TD_TF 1 "memory_operand" ""))]
"TARGET_ZARCH && reload_completed
- && !FP_REG_P (operands[0])
+ && GENERAL_REG_P (operands[0])
&& !s_operand (operands[1], VOIDmode)"
[(set (match_dup 0) (match_dup 1))]
{
(define_insn "*mov<mode>_64dfp"
[(set (match_operand:DD_DF 0 "nonimmediate_operand"
- "=f,f,f,d,f,f,R,T,d,d, d,RT")
+ "=f,f,f,d,f,f,R,T,d,d,d, d,b,RT,v,v,d,v,QR")
(match_operand:DD_DF 1 "general_operand"
- " G,f,d,f,R,T,f,f,G,d,RT, d"))]
+ " G,f,d,f,R,T,f,f,G,d,b,RT,d, d,v,d,v,QR,v"))]
"TARGET_DFP"
"@
lzdr\t%0
stdy\t%1,%0
lghi\t%0,0
lgr\t%0,%1
+ lgrl\t%0,%1
lg\t%0,%1
- stg\t%1,%0"
- [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
+ stgrl\t%1,%0
+ stg\t%1,%0
+ vlr\t%v0,%v1
+ vlvgg\t%v0,%1,0
+ vlgvg\t%0,%v1,0
+ vleg\t%0,%1,0
+ vsteg\t%1,%0,0"
+ [(set_attr "op_type" "RRE,RR,RRE,RRE,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRS,VRS,VRX,VRX")
(set_attr "type" "fsimpdf,floaddf,floaddf,floaddf,floaddf,floaddf,
- fstoredf,fstoredf,*,lr,load,store")
- (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+ fstoredf,fstoredf,*,lr,load,load,store,store,*,*,*,load,store")
+ (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*,*,*")
+ (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec,vec,vec")])
(define_insn "*mov<mode>_64"
- [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d, d,RT")
- (match_operand:DD_DF 1 "general_operand" " G,f,R,T,f,f,G,d,RT, d"))]
+ [(set (match_operand:DD_DF 0 "nonimmediate_operand" "=f,f,f,f,R,T,d,d,d, d,b,RT,v,v,QR")
+ (match_operand:DD_DF 1 "general_operand" " G,f,R,T,f,f,G,d,b,RT,d, d,v,QR,v"))]
"TARGET_ZARCH"
"@
lzdr\t%0
stdy\t%1,%0
lghi\t%0,0
lgr\t%0,%1
+ lgrl\t%0,%1
lg\t%0,%1
- stg\t%1,%0"
- [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RXY,RXY")
+ stgrl\t%1,%0
+ stg\t%1,%0
+ vlr\t%v0,%v1
+ vleg\t%v0,%1,0
+ vsteg\t%v1,%0,0"
+ [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RRE,RIL,RXY,RIL,RXY,VRR,VRX,VRX")
(set_attr "type" "fsimpdf,fload<mode>,fload<mode>,fload<mode>,
- fstore<mode>,fstore<mode>,*,lr,load,store")
- (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_rec")
- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*")])
+ fstore<mode>,fstore<mode>,*,lr,load,load,store,store,*,load,store")
+ (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,*,*,*")
+ (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,z10,*,z10,*,vec,vec,vec")])
(define_insn "*mov<mode>_31"
[(set (match_operand:DD_DF 0 "nonimmediate_operand"
(define_insn "mov<mode>"
[(set (match_operand:SD_SF 0 "nonimmediate_operand"
- "=f,f,f,f,R,T,d,d,d,d,R,T")
+ "=f,f,f,f,f,f,R,T,d,d,d,d,d,b,R,T,v,v,v,d,v,QR")
(match_operand:SD_SF 1 "general_operand"
- " G,f,R,T,f,f,G,d,R,T,d,d"))]
+ " G,f,f,R,R,T,f,f,G,d,b,R,T,d,d,d,v,G,d,v,QR,v"))]
""
"@
lzer\t%0
+ lder\t%0,%1
ler\t%0,%1
+ lde\t%0,%1
le\t%0,%1
ley\t%0,%1
ste\t%1,%0
stey\t%1,%0
lhi\t%0,0
lr\t%0,%1
+ lrl\t%0,%1
l\t%0,%1
ly\t%0,%1
+ strl\t%1,%0
st\t%1,%0
- sty\t%1,%0"
- [(set_attr "op_type" "RRE,RR,RX,RXY,RX,RXY,RI,RR,RX,RXY,RX,RXY")
- (set_attr "type" "fsimpsf,fload<mode>,fload<mode>,fload<mode>,
- fstore<mode>,fstore<mode>,*,lr,load,load,store,store")
- (set_attr "z10prop" "*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec")
- (set_attr "cpu_facility" "z196,*,*,*,*,*,*,*,*,*,*,*")])
+ sty\t%1,%0
+ vlr\t%v0,%v1
+ vleif\t%v0,0
+ vlvgf\t%v0,%1,0
+ vlgvf\t%0,%v1,0
+ vleg\t%0,%1,0
+ vsteg\t%1,%0,0"
+ [(set_attr "op_type" "RRE,RRE,RR,RXE,RX,RXY,RX,RXY,RI,RR,RIL,RX,RXY,RIL,RX,RXY,VRR,VRI,VRS,VRS,VRX,VRX")
+ (set_attr "type" "fsimpsf,fsimpsf,fload<mode>,fload<mode>,fload<mode>,fload<mode>,
+ fstore<mode>,fstore<mode>,*,lr,load,load,load,store,store,store,*,*,*,*,load,store")
+ (set_attr "z10prop" "*,*,*,*,*,*,*,*,z10_fwd_A1,z10_fr_E1,z10_fr_E1,z10_fwd_A3,z10_fwd_A3,z10_rec,z10_rec,z10_rec,*,*,*,*,*,*")
+ (set_attr "cpu_facility" "z196,vec,*,vec,*,*,*,*,*,*,z10,*,*,z10,*,*,vec,vec,vec,vec,vec,vec")])
;
; movcc instruction pattern
;
(define_expand "strlen<mode>"
+ [(match_operand:P 0 "register_operand" "") ; result
+ (match_operand:BLK 1 "memory_operand" "") ; input string
+ (match_operand:SI 2 "immediate_operand" "") ; search character
+ (match_operand:SI 3 "immediate_operand" "")] ; known alignment
+ ""
+{
+ if (!TARGET_VX || operands[2] != const0_rtx)
+ emit_insn (gen_strlen_srst<mode> (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ s390_expand_vec_strlen (operands[0], operands[1], operands[3]);
+
+ DONE;
+})
+
+(define_expand "strlen_srst<mode>"
[(set (reg:SI 0) (match_operand:SI 2 "immediate_operand" ""))
(parallel
[(set (match_dup 4)
operands[2] = GEN_INT (S390_TDC_INFINITY);
})
+; This extracts CC into a GPR properly shifted. The actual IPM
+; instruction will be issued by reload. The constraint of operand 1
+; forces reload to use a GPR. So reload will issue a movcc insn for
+; copying CC into a GPR first.
(define_insn_and_split "*cc_to_int"
- [(set (match_operand:SI 0 "register_operand" "=d")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=d")
(unspec:SI [(match_operand 1 "register_operand" "0")]
UNSPEC_CC_TO_INT))]
"operands != NULL"
; addti3 instruction pattern(s).
;
-(define_insn_and_split "addti3"
- [(set (match_operand:TI 0 "register_operand" "=&d")
+(define_expand "addti3"
+ [(parallel
+ [(set (match_operand:TI 0 "register_operand" "")
+ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "general_operand" "") ) )
+ (clobber (reg:CC CC_REGNUM))])]
+ "TARGET_ZARCH"
+{
+ /* For z13 we have vaq which doesn't set CC. */
+ if (TARGET_VX)
+ {
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_PLUS (TImode,
+ copy_to_mode_reg (TImode, operands[1]),
+ copy_to_mode_reg (TImode, operands[2]))));
+ DONE;
+ }
+})
+
+(define_insn_and_split "*addti3"
+ [(set (match_operand:TI 0 "register_operand" "=&d")
(plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
- (match_operand:TI 2 "general_operand" "do") ) )
+ (match_operand:TI 2 "general_operand" "do") ) )
(clobber (reg:CC CC_REGNUM))]
"TARGET_ZARCH"
"#"
operands[5] = operand_subword (operands[2], 0, 0, TImode);
operands[6] = operand_subword (operands[0], 1, 0, TImode);
operands[7] = operand_subword (operands[1], 1, 0, TImode);
- operands[8] = operand_subword (operands[2], 1, 0, TImode);")
+ operands[8] = operand_subword (operands[2], 1, 0, TImode);"
+ [(set_attr "op_type" "*")
+ (set_attr "cpu_facility" "*")])
;
; adddi3 instruction pattern(s).
; subti3 instruction pattern(s).
;
-(define_insn_and_split "subti3"
- [(set (match_operand:TI 0 "register_operand" "=&d")
- (minus:TI (match_operand:TI 1 "register_operand" "0")
- (match_operand:TI 2 "general_operand" "do") ) )
+(define_expand "subti3"
+ [(parallel
+ [(set (match_operand:TI 0 "register_operand" "")
+ (minus:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:TI 2 "general_operand" "") ) )
+ (clobber (reg:CC CC_REGNUM))])]
+ "TARGET_ZARCH"
+{
+ /* For z13 we have vaq which doesn't set CC. */
+ if (TARGET_VX)
+ {
+ emit_insn (gen_rtx_SET (operands[0],
+ gen_rtx_MINUS (TImode,
+ operands[1],
+ copy_to_mode_reg (TImode, operands[2]))));
+ DONE;
+ }
+})
+
+(define_insn_and_split "*subti3"
+ [(set (match_operand:TI 0 "register_operand" "=&d")
+ (minus:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:TI 2 "general_operand" "do") ) )
(clobber (reg:CC CC_REGNUM))]
"TARGET_ZARCH"
"#"
operands[5] = operand_subword (operands[2], 0, 0, TImode);
operands[6] = operand_subword (operands[0], 1, 0, TImode);
operands[7] = operand_subword (operands[1], 1, 0, TImode);
- operands[8] = operand_subword (operands[2], 1, 0, TImode);")
+ operands[8] = operand_subword (operands[2], 1, 0, TImode);"
+ [(set_attr "op_type" "*")
+ (set_attr "cpu_facility" "*")])
;
; subdi3 instruction pattern(s).
--- /dev/null
+;;- Instruction patterns for the System z vector facility
+;; Copyright (C) 2015 Free Software Foundation, Inc.
+;; Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+; All vector modes supported in a vector register
+(define_mode_iterator V
+ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
+ V2SF V4SF V1DF V2DF])
+(define_mode_iterator VT
+ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
+ V2SF V4SF V1DF V2DF V1TF V1TI TI])
+
+; All vector modes directly supported by the hardware having full vector reg size
+; V_HW2 is duplicate of V_HW for having two iterators expanding
+; independently e.g. vcond
+(define_mode_iterator V_HW [V16QI V8HI V4SI V2DI V2DF])
+(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF])
+; Including TI for instructions that support it (va, vn, ...)
+(define_mode_iterator VT_HW [V16QI V8HI V4SI V2DI V2DF V1TI TI])
+
+; All full size integer vector modes supported in a vector register + TImode
+(define_mode_iterator VIT_HW [V16QI V8HI V4SI V2DI V1TI TI])
+(define_mode_iterator VI_HW [V16QI V8HI V4SI V2DI])
+(define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
+(define_mode_iterator VI_HW_HS [V8HI V4SI])
+(define_mode_iterator VI_HW_QH [V16QI V8HI])
+
+; All integer vector modes supported in a vector register + TImode
+(define_mode_iterator VIT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1TI TI])
+(define_mode_iterator VI [V2QI V4QI V8QI V16QI V2HI V4HI V8HI V2SI V4SI V2DI])
+(define_mode_iterator VI_QHS [V4QI V8QI V16QI V4HI V8HI V4SI])
+
+(define_mode_iterator V_8 [V1QI])
+(define_mode_iterator V_16 [V2QI V1HI])
+(define_mode_iterator V_32 [V4QI V2HI V1SI V1SF])
+(define_mode_iterator V_64 [V8QI V4HI V2SI V2SF V1DI V1DF])
+(define_mode_iterator V_128 [V16QI V8HI V4SI V4SF V2DI V2DF V1TI V1TF])
+
+; A blank for vector modes and a * for TImode. This is used to hide
+; the TImode expander name in case it is defined already. See addti3
+; for an example.
+(define_mode_attr ti* [(V1QI "") (V2QI "") (V4QI "") (V8QI "") (V16QI "")
+ (V1HI "") (V2HI "") (V4HI "") (V8HI "")
+ (V1SI "") (V2SI "") (V4SI "")
+ (V1DI "") (V2DI "")
+ (V1TI "*") (TI "*")])
+
+; The element type of the vector.
+(define_mode_attr non_vec[(V1QI "QI") (V2QI "QI") (V4QI "QI") (V8QI "QI") (V16QI "QI")
+ (V1HI "HI") (V2HI "HI") (V4HI "HI") (V8HI "HI")
+ (V1SI "SI") (V2SI "SI") (V4SI "SI")
+ (V1DI "DI") (V2DI "DI")
+ (V1TI "TI")
+ (V1SF "SF") (V2SF "SF") (V4SF "SF")
+ (V1DF "DF") (V2DF "DF")
+ (V1TF "TF")])
+
+; The instruction suffix
+(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b")
+ (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h")
+ (V1SI "f") (V2SI "f") (V4SI "f")
+ (V1DI "g") (V2DI "g")
+ (V1TI "q") (TI "q")
+ (V1SF "f") (V2SF "f") (V4SF "f")
+ (V1DF "g") (V2DF "g")
+ (V1TF "q")])
+
+; This is for vmalhw. It gets an 'w' attached to avoid confusion with
+; multiply and add logical high vmalh.
+(define_mode_attr w [(V1QI "") (V2QI "") (V4QI "") (V8QI "") (V16QI "")
+ (V1HI "w") (V2HI "w") (V4HI "w") (V8HI "w")
+ (V1SI "") (V2SI "") (V4SI "")
+ (V1DI "") (V2DI "")])
+
+; Resulting mode of a vector comparison. For floating point modes an
+; integer vector mode with the same element size is picked.
+(define_mode_attr tointvec [(V1QI "V1QI") (V2QI "V2QI") (V4QI "V4QI") (V8QI "V8QI") (V16QI "V16QI")
+ (V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI")
+ (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
+ (V1DI "V1DI") (V2DI "V2DI")
+ (V1TI "V1TI")
+ (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI")
+ (V1DF "V1DI") (V2DF "V2DI")
+ (V1TF "V1TI")])
+
+; Vector with doubled element size.
+(define_mode_attr vec_double [(V2QI "V1HI") (V4QI "V2HI") (V8QI "V4HI") (V16QI "V8HI")
+ (V2HI "V1SI") (V4HI "V2SI") (V8HI "V4SI")
+ (V2SI "V1DI") (V4SI "V2DI")
+ (V2DI "V1TI")
+ (V2SF "V1DF") (V4SF "V2DF")])
+
+; Vector with half the element size.
+(define_mode_attr vec_half [(V1HI "V2QI") (V2HI "V4QI") (V4HI "V8QI") (V8HI "V16QI")
+ (V1SI "V2HI") (V2SI "V4HI") (V4SI "V8HI")
+ (V1DI "V2SI") (V2DI "V4SI")
+ (V1TI "V2DI")
+ (V1DF "V2SF") (V2DF "V4SF")
+ (V1TF "V1DF")])
+
+; The comparisons not setting CC iterate over the rtx code.
+(define_code_iterator VFCMP_HW_OP [eq gt ge])
+(define_code_attr asm_fcmp_op [(eq "e") (gt "h") (ge "he")])
+
+
+
+; Comparison operators on int and fp compares which are directly
+; supported by the HW.
+(define_code_iterator VICMP_HW_OP [eq gt gtu])
+; For int insn_cmp_op can be used in the insn name as well as in the asm output.
+(define_code_attr insn_cmp_op [(eq "eq") (gt "h") (gtu "hl") (ge "he")])
+
+; Flags for vector string instructions (vfae all 4, vfee only ZS and CS, vstrc all 4)
+(define_constants
+ [(VSTRING_FLAG_IN 8) ; invert result
+ (VSTRING_FLAG_RT 4) ; result type
+ (VSTRING_FLAG_ZS 2) ; zero search
+ (VSTRING_FLAG_CS 1)]) ; condition code set
+
+; Full HW vector size moves
+(define_insn "mov<mode>"
+ [(set (match_operand:V_128 0 "nonimmediate_operand" "=v, v,QR, v, v, v, v,v,d")
+ (match_operand:V_128 1 "general_operand" " v,QR, v,j00,jm1,jyy,jxx,d,v"))]
+ "TARGET_VX"
+ "@
+ vlr\t%v0,%v1
+ vl\t%v0,%1
+ vst\t%v1,%0
+ vzero\t%v0
+ vone\t%v0
+ vgbm\t%v0,%t1
+ vgm<bhfgq>\t%v0,%s1,%e1
+ vlvgp\t%v0,%1,%N1
+ #"
+ [(set_attr "op_type" "VRR,VRX,VRX,VRI,VRI,VRI,VRI,VRR,*")])
+
+(define_split
+ [(set (match_operand:V_128 0 "register_operand" "")
+ (match_operand:V_128 1 "register_operand" ""))]
+ "TARGET_VX && GENERAL_REG_P (operands[0]) && VECTOR_REG_P (operands[1])"
+ [(set (match_dup 2)
+ (unspec:DI [(subreg:V2DI (match_dup 1) 0)
+ (const_int 0)] UNSPEC_VEC_EXTRACT))
+ (set (match_dup 3)
+ (unspec:DI [(subreg:V2DI (match_dup 1) 0)
+ (const_int 1)] UNSPEC_VEC_EXTRACT))]
+{
+ operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
+ operands[3] = operand_subword (operands[0], 1, 0, <MODE>mode);
+})
+
+; Moves for smaller vector modes.
+
+; In these patterns only the vlr, vone, and vzero instructions write
+; VR bytes outside the mode. This should be ok since we disallow
+; formerly bigger modes being accessed with smaller modes via
+; subreg. Note: The vone, vzero instructions could easily be replaced
+; with vlei which would only access the bytes belonging to the mode.
+; However, this would probably be slower.
+
+(define_insn "mov<mode>"
+ [(set (match_operand:V_8 0 "nonimmediate_operand" "=v,v,d, v,QR, v, v, v, v,d, Q, S, Q, S, d, d,d,d,d,R,T")
+ (match_operand:V_8 1 "general_operand" " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,j00,jm1,jm1,j00,jm1,R,T,b,d,d"))]
+ ""
+ "@
+ vlr\t%v0,%v1
+ vlvgb\t%v0,%1,0
+ vlgvb\t%0,%v1,0
+ vleb\t%v0,%1,0
+ vsteb\t%v1,%0,0
+ vzero\t%v0
+ vone\t%v0
+ vgbm\t%v0,%t1
+ vgm\t%v0,%s1,%e1
+ lr\t%0,%1
+ mvi\t%0,0
+ mviy\t%0,0
+ mvi\t%0,-1
+ mviy\t%0,-1
+ lhi\t%0,0
+ lhi\t%0,-1
+ lh\t%0,%1
+ lhy\t%0,%1
+ lhrl\t%0,%1
+ stc\t%1,%0
+ stcy\t%1,%0"
+ [(set_attr "op_type" "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SI,SIY,SI,SIY,RI,RI,RX,RXY,RIL,RX,RXY")])
+
+(define_insn "mov<mode>"
+ [(set (match_operand:V_16 0 "nonimmediate_operand" "=v,v,d, v,QR, v, v, v, v,d, Q, Q, d, d,d,d,d,R,T,b")
+ (match_operand:V_16 1 "general_operand" " v,d,v,QR, v,j00,jm1,jyy,jxx,d,j00,jm1,j00,jm1,R,T,b,d,d,d"))]
+ ""
+ "@
+ vlr\t%v0,%v1
+ vlvgh\t%v0,%1,0
+ vlgvh\t%0,%v1,0
+ vleh\t%v0,%1,0
+ vsteh\t%v1,%0,0
+ vzero\t%v0
+ vone\t%v0
+ vgbm\t%v0,%t1
+ vgm\t%v0,%s1,%e1
+ lr\t%0,%1
+ mvhhi\t%0,0
+ mvhhi\t%0,-1
+ lhi\t%0,0
+ lhi\t%0,-1
+ lh\t%0,%1
+ lhy\t%0,%1
+ lhrl\t%0,%1
+ sth\t%1,%0
+ sthy\t%1,%0
+ sthrl\t%1,%0"
+ [(set_attr "op_type" "VRR,VRS,VRS,VRX,VRX,VRI,VRI,VRI,VRI,RR,SIL,SIL,RI,RI,RX,RXY,RIL,RX,RXY,RIL")])
+
+(define_insn "mov<mode>"
+ [(set (match_operand:V_32 0 "nonimmediate_operand" "=f,f,f,R,T,v,v,d, v,QR, f, v, v, v, v, Q, Q, d, d,d,d,d,d,R,T,b")
+ (match_operand:V_32 1 "general_operand" " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,b,d,R,T,d,d,d"))]
+ "TARGET_VX"
+ "@
+ lder\t%v0,%v1
+ lde\t%0,%1
+ ley\t%0,%1
+ ste\t%1,%0
+ stey\t%1,%0
+ vlr\t%v0,%v1
+ vlvgf\t%v0,%1,0
+ vlgvf\t%0,%v1,0
+ vlef\t%v0,%1,0
+ vstef\t%1,%0,0
+ lzer\t%v0
+ vzero\t%v0
+ vone\t%v0
+ vgbm\t%v0,%t1
+ vgm\t%v0,%s1,%e1
+ mvhi\t%0,0
+ mvhi\t%0,-1
+ lhi\t%0,0
+ lhi\t%0,-1
+ lrl\t%0,%1
+ lr\t%0,%1
+ l\t%0,%1
+ ly\t%0,%1
+ st\t%1,%0
+ sty\t%1,%0
+ strl\t%1,%0"
+ [(set_attr "op_type" "RRE,RXE,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI,SIL,SIL,RI,RI,
+ RIL,RR,RX,RXY,RX,RXY,RIL")])
+
+(define_insn "mov<mode>"
+ [(set (match_operand:V_64 0 "nonimmediate_operand"
+ "=f,f,f,R,T,v,v,d, v,QR, f, v, v, v, v, Q, Q, d, d,f,d,d,d, d,RT,b")
+ (match_operand:V_64 1 "general_operand"
+ " f,R,T,f,f,v,d,v,QR, v,j00,j00,jm1,jyy,jxx,j00,jm1,j00,jm1,d,f,b,d,RT, d,d"))]
+ "TARGET_ZARCH"
+ "@
+ ldr\t%0,%1
+ ld\t%0,%1
+ ldy\t%0,%1
+ std\t%1,%0
+ stdy\t%1,%0
+ vlr\t%v0,%v1
+ vlvgg\t%v0,%1,0
+ vlgvg\t%0,%v1,0
+ vleg\t%v0,%1,0
+ vsteg\t%v1,%0,0
+ lzdr\t%0
+ vzero\t%v0
+ vone\t%v0
+ vgbm\t%v0,%t1
+ vgm\t%v0,%s1,%e1
+ mvghi\t%0,0
+ mvghi\t%0,-1
+ lghi\t%0,0
+ lghi\t%0,-1
+ ldgr\t%0,%1
+ lgdr\t%0,%1
+ lgrl\t%0,%1
+ lgr\t%0,%1
+ lg\t%0,%1
+ stg\t%1,%0
+ stgrl\t%1,%0"
+ [(set_attr "op_type" "RRE,RX,RXY,RX,RXY,VRR,VRS,VRS,VRX,VRX,RRE,VRI,VRI,VRI,VRI,
+ SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")])
+
+
+; vec_load_lanes?
+
+; vec_store_lanes?
+
+; FIXME: Support also vector mode operands for 1
+; FIXME: A target memory operand seems to be useful otherwise we end
+; up with vl vlvgg vst. Shouldn't the middle-end be able to handle
+; that itself?
+(define_insn "*vec_set<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v, v,v")
+ (unspec:V [(match_operand:<non_vec> 1 "general_operand" "d,QR,K")
+ (match_operand:DI 2 "shift_count_or_setmem_operand" "Y, I,I")
+ (match_operand:V 3 "register_operand" "0, 0,0")]
+ UNSPEC_VEC_SET))]
+ "TARGET_VX"
+ "@
+ vlvg<bhfgq>\t%v0,%1,%Y2
+ vle<bhfgq>\t%v0,%1,%2
+ vlei<bhfgq>\t%v0,%1,%2"
+ [(set_attr "op_type" "VRS,VRX,VRI")])
+
+; vec_set is supposed to *modify* an existing vector so operand 0 is
+; duplicated as input operand.
+(define_expand "vec_set<mode>"
+ [(set (match_operand:V 0 "register_operand" "")
+ (unspec:V [(match_operand:<non_vec> 1 "general_operand" "")
+ (match_operand:SI 2 "shift_count_or_setmem_operand" "")
+ (match_dup 0)]
+ UNSPEC_VEC_SET))]
+ "TARGET_VX")
+
+; FIXME: Support also vector mode operands for 0
+; FIXME: This should be (vec_select ..) or something but it does only allow constant selectors :(
+; This is used via RTL standard name as well as for expanding the builtin
+(define_insn "vec_extract<mode>"
+ [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,QR")
+ (unspec:<non_vec> [(match_operand:V 1 "register_operand" " v, v")
+ (match_operand:SI 2 "shift_count_or_setmem_operand" " Y, I")]
+ UNSPEC_VEC_EXTRACT))]
+ "TARGET_VX"
+ "@
+ vlgv<bhfgq>\t%0,%v1,%Y2
+ vste<bhfgq>\t%v1,%0,%2"
+ [(set_attr "op_type" "VRS,VRX")])
+
+(define_expand "vec_init<V_HW:mode>"
+ [(match_operand:V_HW 0 "register_operand" "")
+ (match_operand:V_HW 1 "nonmemory_operand" "")]
+ "TARGET_VX"
+{
+ s390_expand_vec_init (operands[0], operands[1]);
+ DONE;
+})
+
+; Replicate from vector element
+(define_insn "*vec_splat<mode>"
+ [(set (match_operand:V_HW 0 "register_operand" "=v")
+ (vec_duplicate:V_HW
+ (vec_select:<non_vec>
+ (match_operand:V_HW 1 "register_operand" "v")
+ (parallel
+ [(match_operand:QI 2 "immediate_operand" "C")]))))]
+ "TARGET_VX"
+ "vrep<bhfgq>\t%v0,%v1,%2"
+ [(set_attr "op_type" "VRI")])
+
+(define_insn "*vec_splats<mode>"
+ [(set (match_operand:V_HW 0 "register_operand" "=v,v,v,v")
+ (vec_duplicate:V_HW (match_operand:<non_vec> 1 "general_operand" "QR,I,v,d")))]
+ "TARGET_VX"
+ "@
+ vlrep<bhfgq>\t%v0,%1
+ vrepi<bhfgq>\t%v0,%1
+ vrep<bhfgq>\t%v0,%v1,0
+ #"
+ [(set_attr "op_type" "VRX,VRI,VRI,*")])
+
+; vec_splats is supposed to replicate op1 into all elements of op0
+; This splitter first sets the rightmost element of op0 to op1 and
+; then does a vec_splat to replicate that element into all other
+; elements.
+(define_split
+ [(set (match_operand:V_HW 0 "register_operand" "")
+ (vec_duplicate:V_HW (match_operand:<non_vec> 1 "register_operand" "")))]
+ "TARGET_VX && GENERAL_REG_P (operands[1])"
+ [(set (match_dup 0)
+ (unspec:V_HW [(match_dup 1) (match_dup 2) (match_dup 0)] UNSPEC_VEC_SET))
+ (set (match_dup 0)
+ (vec_duplicate:V_HW
+ (vec_select:<non_vec>
+ (match_dup 0) (parallel [(match_dup 2)]))))]
+{
+ operands[2] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
+})
+
+(define_expand "vcond<V_HW:mode><V_HW2:mode>"
+ [(set (match_operand:V_HW 0 "register_operand" "")
+ (if_then_else:V_HW
+ (match_operator 3 "comparison_operator"
+ [(match_operand:V_HW2 4 "register_operand" "")
+ (match_operand:V_HW2 5 "register_operand" "")])
+ (match_operand:V_HW 1 "nonmemory_operand" "")
+ (match_operand:V_HW 2 "nonmemory_operand" "")))]
+ "TARGET_VX && GET_MODE_NUNITS (<V_HW:MODE>mode) == GET_MODE_NUNITS (<V_HW2:MODE>mode)"
+{
+ s390_expand_vcond (operands[0], operands[1], operands[2],
+ GET_CODE (operands[3]), operands[4], operands[5]);
+ DONE;
+})
+
+(define_expand "vcondu<V_HW:mode><V_HW2:mode>"
+ [(set (match_operand:V_HW 0 "register_operand" "")
+ (if_then_else:V_HW
+ (match_operator 3 "comparison_operator"
+ [(match_operand:V_HW2 4 "register_operand" "")
+ (match_operand:V_HW2 5 "register_operand" "")])
+ (match_operand:V_HW 1 "nonmemory_operand" "")
+ (match_operand:V_HW 2 "nonmemory_operand" "")))]
+ "TARGET_VX && GET_MODE_NUNITS (<V_HW:MODE>mode) == GET_MODE_NUNITS (<V_HW2:MODE>mode)"
+{
+ s390_expand_vcond (operands[0], operands[1], operands[2],
+ GET_CODE (operands[3]), operands[4], operands[5]);
+ DONE;
+})
+
+; We only have HW support for byte vectors. The middle-end is
+; supposed to lower the mode if required.
+(define_insn "vec_permv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+ (match_operand:V16QI 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "v")]
+ UNSPEC_VEC_PERM))]
+ "TARGET_VX"
+ "vperm\t%v0,%v1,%v2,%v3"
+ [(set_attr "op_type" "VRR")])
+
+; vec_perm_const for V2DI using vpdi?
+
+;;
+;; Vector integer arithmetic instructions
+;;
+
+; vab, vah, vaf, vag, vaq
+
+; We use nonimmediate_operand instead of register_operand since it is
+; better to have the reloads into VRs instead of splitting the
+; operation into two DImode ADDs.
+(define_insn "<ti*>add<mode>3"
+ [(set (match_operand:VIT 0 "nonimmediate_operand" "=v")
+ (plus:VIT (match_operand:VIT 1 "nonimmediate_operand" "v")
+ (match_operand:VIT 2 "nonimmediate_operand" "v")))]
+ "TARGET_VX"
+ "va<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vsb, vsh, vsf, vsg, vsq
+(define_insn "<ti*>sub<mode>3"
+ [(set (match_operand:VIT 0 "nonimmediate_operand" "=v")
+ (minus:VIT (match_operand:VIT 1 "nonimmediate_operand" "v")
+ (match_operand:VIT 2 "nonimmediate_operand" "v")))]
+ "TARGET_VX"
+ "vs<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmlb, vmlhw, vmlf
+(define_insn "mul<mode>3"
+ [(set (match_operand:VI_QHS 0 "register_operand" "=v")
+ (mult:VI_QHS (match_operand:VI_QHS 1 "register_operand" "v")
+ (match_operand:VI_QHS 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vml<bhfgq><w>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vlcb, vlch, vlcf, vlcg
+(define_insn "neg<mode>2"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (neg:VI (match_operand:VI 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vlc<bhfgq>\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+; vlpb, vlph, vlpf, vlpg
+(define_insn "abs<mode>2"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (abs:VI (match_operand:VI 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vlp<bhfgq>\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+
+; Vector sum across
+
+; Sum across DImode parts of the 1st operand and add the rightmost
+; element of 2nd operand
+; vsumgh, vsumgf
+(define_insn "*vec_sum2<mode>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:VI_HW_HS 1 "register_operand" "v")
+ (match_operand:VI_HW_HS 2 "register_operand" "v")]
+ UNSPEC_VEC_VSUMG))]
+ "TARGET_VX"
+ "vsumg<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vsumb, vsumh
+(define_insn "*vec_sum4<mode>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand" "v")
+ (match_operand:VI_HW_QH 2 "register_operand" "v")]
+ UNSPEC_VEC_VSUM))]
+ "TARGET_VX"
+ "vsum<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+;;
+;; Vector bit instructions (int + fp)
+;;
+
+; Vector and
+
+(define_insn "and<mode>3"
+ [(set (match_operand:VT 0 "register_operand" "=v")
+ (and:VT (match_operand:VT 1 "register_operand" "v")
+ (match_operand:VT 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vn\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+
+; Vector or
+
+(define_insn "ior<mode>3"
+ [(set (match_operand:VT 0 "register_operand" "=v")
+ (ior:VT (match_operand:VT 1 "register_operand" "v")
+ (match_operand:VT 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vo\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+
+; Vector xor
+
+(define_insn "xor<mode>3"
+ [(set (match_operand:VT 0 "register_operand" "=v")
+ (xor:VT (match_operand:VT 1 "register_operand" "v")
+ (match_operand:VT 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vx\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+
+; Bitwise inversion of a vector - used for vec_cmpne
+(define_insn "*not<mode>"
+ [(set (match_operand:VT 0 "register_operand" "=v")
+ (not:VT (match_operand:VT 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vnot\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+; Vector population count
+
+(define_insn "popcountv16qi2"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+ UNSPEC_POPCNT))]
+ "TARGET_VX"
+ "vpopct\t%v0,%v1,0"
+ [(set_attr "op_type" "VRR")])
+
+; vpopct only counts bits in byte elements. Bigger element sizes need
+; to be emulated. Word and doubleword elements can use the sum across
+; instructions. For halfword sized elements we do a shift of a copy
+; of the result, add it to the result and extend it to halfword
+; element size (unpack).
+
+(define_expand "popcountv8hi2"
+ [(set (match_dup 2)
+ (unspec:V16QI [(subreg:V16QI (match_operand:V8HI 1 "register_operand" "v") 0)]
+ UNSPEC_POPCNT))
+ ; Make a copy of the result
+ (set (match_dup 3) (match_dup 2))
+ ; Generate the shift count operand in a VR (8->byte 7)
+ (set (match_dup 4) (match_dup 5))
+ (set (match_dup 4) (unspec:V16QI [(const_int 8)
+ (const_int 7)
+ (match_dup 4)] UNSPEC_VEC_SET))
+ ; Vector shift right logical by one byte
+ (set (match_dup 3)
+ (unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB))
+ ; Add the shifted and the original result
+ (set (match_dup 2)
+ (plus:V16QI (match_dup 2) (match_dup 3)))
+ ; Generate mask for the odd numbered byte elements
+ (set (match_dup 3)
+ (const_vector:V16QI [(const_int 0) (const_int 255)
+ (const_int 0) (const_int 255)
+ (const_int 0) (const_int 255)
+ (const_int 0) (const_int 255)
+ (const_int 0) (const_int 255)
+ (const_int 0) (const_int 255)
+ (const_int 0) (const_int 255)
+ (const_int 0) (const_int 255)]))
+ ; Zero out the even indexed bytes
+ (set (match_operand:V8HI 0 "register_operand" "=v")
+ (and:V8HI (subreg:V8HI (match_dup 2) 0)
+ (subreg:V8HI (match_dup 3) 0)))
+]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V16QImode);
+ operands[3] = gen_reg_rtx (V16QImode);
+ operands[4] = gen_reg_rtx (V16QImode);
+ operands[5] = CONST0_RTX (V16QImode);
+})
+
+(define_expand "popcountv4si2"
+ [(set (match_dup 2)
+ (unspec:V16QI [(subreg:V16QI (match_operand:V4SI 1 "register_operand" "v") 0)]
+ UNSPEC_POPCNT))
+ (set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_dup 2) (match_dup 3)]
+ UNSPEC_VEC_VSUM))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V16QImode);
+ operands[3] = force_reg (V16QImode, CONST0_RTX (V16QImode));
+})
+
+(define_expand "popcountv2di2"
+ [(set (match_dup 2)
+ (unspec:V16QI [(subreg:V16QI (match_operand:V2DI 1 "register_operand" "v") 0)]
+ UNSPEC_POPCNT))
+ (set (match_dup 3)
+ (unspec:V4SI [(match_dup 2) (match_dup 4)]
+ UNSPEC_VEC_VSUM))
+ (set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_dup 3) (match_dup 5)]
+ UNSPEC_VEC_VSUMG))]
+ "TARGET_VX"
+{
+ operands[2] = gen_reg_rtx (V16QImode);
+ operands[3] = gen_reg_rtx (V4SImode);
+ operands[4] = force_reg (V16QImode, CONST0_RTX (V16QImode));
+ operands[5] = force_reg (V4SImode, CONST0_RTX (V4SImode));
+})
+
+; Count leading zeros
+(define_insn "clz<mode>2"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (clz:V (match_operand:V 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vclz<bhfgq>\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+; Count trailing zeros
+(define_insn "ctz<mode>2"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (ctz:V (match_operand:V 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vctz<bhfgq>\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+
+; Vector rotate instructions
+
+; Each vector element rotated by a scalar
+; verllb, verllh, verllf, verllg
+(define_insn "rotl<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (rotate:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+ "TARGET_VX"
+ "verll<bhfgq>\t%v0,%v1,%Y2"
+ [(set_attr "op_type" "VRS")])
+
+; Each vector element rotated by the corresponding vector element
+; verllvb, verllvh, verllvf, verllvg
+(define_insn "vrotl<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (rotate:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "verllv<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+
+; Shift each element by scalar value
+
+; veslb, veslh, veslf, veslg
+(define_insn "ashl<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (ashift:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+ "TARGET_VX"
+ "vesl<bhfgq>\t%v0,%v1,%Y2"
+ [(set_attr "op_type" "VRS")])
+
+; vesrab, vesrah, vesraf, vesrag
+(define_insn "ashr<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+ "TARGET_VX"
+ "vesra<bhfgq>\t%v0,%v1,%Y2"
+ [(set_attr "op_type" "VRS")])
+
+; vesrlb, vesrlh, vesrlf, vesrlg
+(define_insn "lshr<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:SI 2 "shift_count_or_setmem_operand" "Y")))]
+ "TARGET_VX"
+ "vesrl<bhfgq>\t%v0,%v1,%Y2"
+ [(set_attr "op_type" "VRS")])
+
+
+; Shift each element by corresponding vector element
+
+; veslvb, veslvh, veslvf, veslvg
+(define_insn "vashl<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (ashift:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "veslv<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vesravb, vesravh, vesravf, vesravg
+(define_insn "vashr<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vesrav<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vesrlvb, vesrlvh, vesrlvf, vesrlvg
+(define_insn "vlshr<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vesrlv<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; Vector shift right logical by byte
+
+; Pattern used by e.g. popcount
+(define_insn "*vec_srb<mode>"
+ [(set (match_operand:V_HW 0 "register_operand" "=v")
+ (unspec:V_HW [(match_operand:V_HW 1 "register_operand" "v")
+ (match_operand:<tointvec> 2 "register_operand" "v")]
+ UNSPEC_VEC_SRLB))]
+ "TARGET_VX"
+ "vsrlb\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+
+; vmnb, vmnh, vmnf, vmng
+(define_insn "smin<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (smin:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vmn<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmxb, vmxh, vmxf, vmxg
+(define_insn "smax<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (smax:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vmx<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmnlb, vmnlh, vmnlf, vmnlg
+(define_insn "umin<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (umin:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vmnl<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmxlb, vmxlh, vmxlf, vmxlg
+(define_insn "umax<mode>3"
+ [(set (match_operand:VI 0 "register_operand" "=v")
+ (umax:VI (match_operand:VI 1 "register_operand" "v")
+ (match_operand:VI 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vmxl<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmeb, vmeh, vmef
+(define_insn "vec_widen_smult_even_<mode>"
+ [(set (match_operand:<vec_double> 0 "register_operand" "=v")
+ (unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "v")
+ (match_operand:VI_QHS 2 "register_operand" "v")]
+ UNSPEC_VEC_SMULT_EVEN))]
+ "TARGET_VX"
+ "vme<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmleb, vmleh, vmlef
+(define_insn "vec_widen_umult_even_<mode>"
+ [(set (match_operand:<vec_double> 0 "register_operand" "=v")
+ (unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "v")
+ (match_operand:VI_QHS 2 "register_operand" "v")]
+ UNSPEC_VEC_UMULT_EVEN))]
+ "TARGET_VX"
+ "vmle<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmob, vmoh, vmof
+(define_insn "vec_widen_smult_odd_<mode>"
+ [(set (match_operand:<vec_double> 0 "register_operand" "=v")
+ (unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "v")
+ (match_operand:VI_QHS 2 "register_operand" "v")]
+ UNSPEC_VEC_SMULT_ODD))]
+ "TARGET_VX"
+ "vmo<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vmlob, vmloh, vmlof
+(define_insn "vec_widen_umult_odd_<mode>"
+ [(set (match_operand:<vec_double> 0 "register_operand" "=v")
+ (unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand" "v")
+ (match_operand:VI_QHS 2 "register_operand" "v")]
+ UNSPEC_VEC_UMULT_ODD))]
+ "TARGET_VX"
+ "vmlo<bhfgq>\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; vec_widen_umult_hi
+; vec_widen_umult_lo
+; vec_widen_smult_hi
+; vec_widen_smult_lo
+
+; vec_widen_ushiftl_hi
+; vec_widen_ushiftl_lo
+; vec_widen_sshiftl_hi
+; vec_widen_sshiftl_lo
+
+;;
+;; Vector floating point arithmetic instructions
+;;
+
+(define_insn "addv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (plus:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vfadb\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "subv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (minus:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vfsdb\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "mulv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (mult:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vfmdb\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "divv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vfddb\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "sqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vfsqdb\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "fmav2df4"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (fma:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")
+ (match_operand:V2DF 3 "register_operand" "v")))]
+ "TARGET_VX"
+ "vfmadb\t%v0,%v1,%v2,%v3"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "fmsv2df4"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (fma:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")
+ (neg:V2DF (match_operand:V2DF 3 "register_operand" "v"))))]
+ "TARGET_VX"
+ "vfmsdb\t%v0,%v1,%v2,%v3"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "negv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (neg:V2DF (match_operand:V2DF 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vflcdb\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "absv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (abs:V2DF (match_operand:V2DF 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vflpdb\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "*negabsv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (neg:V2DF (abs:V2DF (match_operand:V2DF 1 "register_operand" "v"))))]
+ "TARGET_VX"
+ "vflndb\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+; Emulate with compare + select
+(define_insn_and_split "smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (smax:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "#"
+ ""
+ [(set (match_dup 3)
+ (gt:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 0)
+ (if_then_else:V2DF
+ (eq (match_dup 3) (match_dup 4))
+ (match_dup 2)
+ (match_dup 1)))]
+{
+ operands[3] = gen_reg_rtx (V2DImode);
+ operands[4] = CONST0_RTX (V2DImode);
+})
+
+; Emulate with compare + select
+(define_insn_and_split "sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (smin:V2DF (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "#"
+ ""
+ [(set (match_dup 3)
+ (gt:V2DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 0)
+ (if_then_else:V2DF
+ (eq (match_dup 3) (match_dup 4))
+ (match_dup 1)
+ (match_dup 2)))]
+{
+ operands[3] = gen_reg_rtx (V2DImode);
+ operands[4] = CONST0_RTX (V2DImode);
+})
+
+
+;;
+;; Integer compares
+;;
+
+(define_insn "*vec_cmp<VICMP_HW_OP:code><VI:mode>_nocc"
+ [(set (match_operand:VI 2 "register_operand" "=v")
+ (VICMP_HW_OP:VI (match_operand:VI 0 "register_operand" "v")
+ (match_operand:VI 1 "register_operand" "v")))]
+ "TARGET_VX"
+ "vc<VICMP_HW_OP:insn_cmp_op><VI:bhfgq>\t%v2,%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+
+;;
+;; Floating point compares
+;;
+
+; EQ, GT, GE
+(define_insn "*vec_cmp<VFCMP_HW_OP:code>v2df_nocc"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (VFCMP_HW_OP:V2DI (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vfc<VFCMP_HW_OP:asm_fcmp_op>db\t%v0,%v1,%v2"
+ [(set_attr "op_type" "VRR")])
+
+; Expanders for not directly supported comparisons
+
+; UNEQ a u== b -> !(a > b | b > a)
+(define_expand "vec_cmpuneqv2df"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (gt:V2DI (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))
+ (set (match_dup 3)
+ (gt:V2DI (match_dup 2) (match_dup 1)))
+ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))
+ (set (match_dup 0) (not:V2DI (match_dup 0)))]
+ "TARGET_VX"
+{
+ operands[3] = gen_reg_rtx (V2DImode);
+})
+
+; LTGT a <> b -> a > b | b > a
+(define_expand "vec_cmpltgtv2df"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (gt:V2DI (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))
+ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1)))
+ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))]
+ "TARGET_VX"
+{
+ operands[3] = gen_reg_rtx (V2DImode);
+})
+
+; ORDERED (a, b): a >= b | b > a
+(define_expand "vec_orderedv2df"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (ge:V2DI (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))
+ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1)))
+ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))]
+ "TARGET_VX"
+{
+ operands[3] = gen_reg_rtx (V2DImode);
+})
+
+; UNORDERED (a, b): !ORDERED (a, b)
+(define_expand "vec_unorderedv2df"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (ge:V2DI (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "register_operand" "v")))
+ (set (match_dup 3) (gt:V2DI (match_dup 2) (match_dup 1)))
+ (set (match_dup 0) (ior:V2DI (match_dup 0) (match_dup 3)))
+ (set (match_dup 0) (not:V2DI (match_dup 0)))]
+ "TARGET_VX"
+{
+ operands[3] = gen_reg_rtx (V2DImode);
+})
+
+(define_insn "*vec_load_pairv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (vec_concat:V2DI (match_operand:DI 1 "register_operand" "d")
+ (match_operand:DI 2 "register_operand" "d")))]
+ "TARGET_VX"
+ "vlvgp\t%v0,%1,%2"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "vllv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:SI 1 "register_operand" "d")
+ (match_operand:BLK 2 "memory_operand" "Q")]
+ UNSPEC_VEC_LOAD_LEN))]
+ "TARGET_VX"
+ "vll\t%v0,%1,%2"
+ [(set_attr "op_type" "VRS")])
+
+; vfenebs, vfenehs, vfenefs
+; vfenezbs, vfenezhs, vfenezfs
+(define_insn "vec_vfenes<mode>"
+ [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
+ (unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
+ (match_operand:VI_HW_QHS 2 "register_operand" "v")
+ (match_operand:QI 3 "immediate_operand" "C")]
+ UNSPEC_VEC_VFENE))
+ (set (reg:CCRAW CC_REGNUM)
+ (unspec:CCRAW [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_VEC_VFENECC))]
+ "TARGET_VX"
+{
+ unsigned HOST_WIDE_INT flags = INTVAL (operands[3]);
+
+ gcc_assert (!(flags & ~(VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
+ flags &= ~VSTRING_FLAG_CS;
+
+ if (flags == VSTRING_FLAG_ZS)
+ return "vfenez<bhfgq>s\t%v0,%v1,%v2";
+ return "vfene<bhfgq>s\t%v0,%v1,%v2";
+}
+ [(set_attr "op_type" "VRR")])
+
+
+; Vector select
+
+; The following splitters simplify vec_sel for constant 0 or -1
+; selection sources. This is required to generate efficient code for
+; vcond.
+
+; a = b == c;
+(define_split
+ [(set (match_operand:V 0 "register_operand" "")
+ (if_then_else:V
+ (eq (match_operand:<tointvec> 3 "register_operand" "")
+ (match_operand:V 4 "const0_operand" ""))
+ (match_operand:V 1 "const0_operand" "")
+ (match_operand:V 2 "constm1_operand" "")))]
+ "TARGET_VX"
+ [(set (match_dup 0) (match_dup 3))]
+{
+ PUT_MODE (operands[3], <V:MODE>mode);
+})
+
+; a = ~(b == c)
+(define_split
+ [(set (match_operand:V 0 "register_operand" "")
+ (if_then_else:V
+ (eq (match_operand:<tointvec> 3 "register_operand" "")
+ (match_operand:V 4 "const0_operand" ""))
+ (match_operand:V 1 "constm1_operand" "")
+ (match_operand:V 2 "const0_operand" "")))]
+ "TARGET_VX"
+ [(set (match_dup 0) (not:V (match_dup 3)))]
+{
+ PUT_MODE (operands[3], <V:MODE>mode);
+})
+
+; a = b != c
+(define_split
+ [(set (match_operand:V 0 "register_operand" "")
+ (if_then_else:V
+ (ne (match_operand:<tointvec> 3 "register_operand" "")
+ (match_operand:V 4 "const0_operand" ""))
+ (match_operand:V 1 "constm1_operand" "")
+ (match_operand:V 2 "const0_operand" "")))]
+ "TARGET_VX"
+ [(set (match_dup 0) (match_dup 3))]
+{
+ PUT_MODE (operands[3], <V:MODE>mode);
+})
+
+; a = ~(b != c)
+(define_split
+ [(set (match_operand:V 0 "register_operand" "")
+ (if_then_else:V
+ (ne (match_operand:<tointvec> 3 "register_operand" "")
+ (match_operand:V 4 "const0_operand" ""))
+ (match_operand:V 1 "const0_operand" "")
+ (match_operand:V 2 "constm1_operand" "")))]
+ "TARGET_VX"
+ [(set (match_dup 0) (not:V (match_dup 3)))]
+{
+ PUT_MODE (operands[3], <V:MODE>mode);
+})
+
+; op0 = op3 == 0 ? op1 : op2
+(define_insn "*vec_sel0<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (if_then_else:V
+ (eq (match_operand:<tointvec> 3 "register_operand" "v")
+ (match_operand:<tointvec> 4 "const0_operand" ""))
+ (match_operand:V 1 "register_operand" "v")
+ (match_operand:V 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vsel\t%v0,%2,%1,%3"
+ [(set_attr "op_type" "VRR")])
+
+; op0 = !op3 == 0 ? op1 : op2
+(define_insn "*vec_sel0<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (if_then_else:V
+ (eq (not:<tointvec> (match_operand:<tointvec> 3 "register_operand" "v"))
+ (match_operand:<tointvec> 4 "const0_operand" ""))
+ (match_operand:V 1 "register_operand" "v")
+ (match_operand:V 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vsel\t%v0,%1,%2,%3"
+ [(set_attr "op_type" "VRR")])
+
+; op0 = op3 == -1 ? op1 : op2
+(define_insn "*vec_sel1<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (if_then_else:V
+ (eq (match_operand:<tointvec> 3 "register_operand" "v")
+ (match_operand:<tointvec> 4 "constm1_operand" ""))
+ (match_operand:V 1 "register_operand" "v")
+ (match_operand:V 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vsel\t%v0,%1,%2,%3"
+ [(set_attr "op_type" "VRR")])
+
+; op0 = !op3 == -1 ? op1 : op2
+(define_insn "*vec_sel1<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (if_then_else:V
+ (eq (not:<tointvec> (match_operand:<tointvec> 3 "register_operand" "v"))
+ (match_operand:<tointvec> 4 "constm1_operand" ""))
+ (match_operand:V 1 "register_operand" "v")
+ (match_operand:V 2 "register_operand" "v")))]
+ "TARGET_VX"
+ "vsel\t%v0,%2,%1,%3"
+ [(set_attr "op_type" "VRR")])
+
+
+
+; reduc_smin
+; reduc_smax
+; reduc_umin
+; reduc_umax
+
+; vec_shl vrep + vsl
+; vec_shr
+
+; vec_pack_trunc
+; vec_pack_ssat
+; vec_pack_usat
+; vec_pack_sfix_trunc
+; vec_pack_ufix_trunc
+; vec_unpacks_hi
+; vec_unpacks_low
+; vec_unpacku_hi
+; vec_unpacku_low
+; vec_unpacks_float_hi
+; vec_unpacks_float_lo
+; vec_unpacku_float_hi
+; vec_unpacku_float_lo