/* Subroutines used for code generation on IBM S/390 and zSeries
- Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 1999-2013 Free Software Foundation, Inc.
Contributed by Hartmut Penner (hpenner@de.ibm.com) and
Ulrich Weigand (uweigand@de.ibm.com) and
Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
COSTS_N_INSNS (160), /* DSGR cracked */
};
+static const
+struct processor_costs zEC12_cost =
+{
+ COSTS_N_INSNS (7), /* M */
+ COSTS_N_INSNS (5), /* MGHI */
+ COSTS_N_INSNS (5), /* MH */
+ COSTS_N_INSNS (5), /* MHI */
+ COSTS_N_INSNS (7), /* ML */
+ COSTS_N_INSNS (7), /* MR */
+ COSTS_N_INSNS (6), /* MS */
+ COSTS_N_INSNS (8), /* MSG */
+ COSTS_N_INSNS (6), /* MSGF */
+ COSTS_N_INSNS (6), /* MSGFR */
+ COSTS_N_INSNS (8), /* MSGR */
+ COSTS_N_INSNS (6), /* MSR */
+ COSTS_N_INSNS (1) , /* multiplication in DFmode */
+ COSTS_N_INSNS (40), /* MXBR B+40 */
+ COSTS_N_INSNS (100), /* SQXBR B+100 */
+ COSTS_N_INSNS (42), /* SQDBR B+42 */
+ COSTS_N_INSNS (28), /* SQEBR B+28 */
+ COSTS_N_INSNS (1), /* MADBR B */
+ COSTS_N_INSNS (1), /* MAEBR B */
+ COSTS_N_INSNS (131), /* DXBR B+131 */
+ COSTS_N_INSNS (29), /* DDBR */
+ COSTS_N_INSNS (22), /* DEBR */
+ COSTS_N_INSNS (160), /* DLGR cracked */
+ COSTS_N_INSNS (160), /* DLR cracked */
+ COSTS_N_INSNS (160), /* DR expanded */
+ COSTS_N_INSNS (160), /* DSGFR cracked */
+ COSTS_N_INSNS (160), /* DSGR cracked */
+};
+
extern int reload_completed;
/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
/* Bits standing for floating point registers. Set, if the
respective register has to be saved. Starting with reg 16 (f0)
at the rightmost bit.
- Bit 15 - 8 7 6 5 4 3 2 1 0
- fpr 15 - 8 7 5 3 1 6 4 2 0
- reg 31 - 24 23 22 21 20 19 18 17 16 */
+ Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
+ reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
unsigned int fpr_bitmap;
/* Number of floating point registers f8-f15 which must be saved. */
const char *some_ld_name;
bool has_landing_pad_p;
+
+ /* True if the current function may contain a tbegin clobbering
+ FPRs. */
+ bool tbegin_p;
};
/* Few accessor macros for struct cfun->machine->s390_frame_layout. */
#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
-#define cfun_set_fpr_bit(BITNUM) (cfun->machine->frame_layout.fpr_bitmap |= \
- (1 << (BITNUM)))
-#define cfun_fpr_bit_p(BITNUM) (!!(cfun->machine->frame_layout.fpr_bitmap & \
- (1 << (BITNUM))))
+#define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
+ (1 << (REGNO - FPR0_REGNUM)))
+#define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
+ (1 << (REGNO - FPR0_REGNUM))))
/* Number of GPRs and FPRs used for argument passing. */
#define GP_ARG_NUM_REG 5
int a, b; if ((b = a + c) > 0)
with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
- && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
+ && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
+ || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
+ /* Avoid INT32_MIN on 32 bit. */
+ && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
{
if (INTVAL (XEXP((op0), 1)) < 0)
return CCANmode;
/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
that we can implement more efficiently. */
-void
-s390_canonicalize_comparison (enum rtx_code *code, rtx *op0, rtx *op1)
+static void
+s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+ bool op0_preserve_value)
{
+ if (op0_preserve_value)
+ return;
+
/* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
if ((*code == EQ || *code == NE)
&& *op1 == const0_rtx
*op1 = constm1_rtx;
}
- /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */
+ /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
if (GET_CODE (*op0) == UNSPEC
- && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
+ && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
&& XVECLEN (*op0, 0) == 1
&& GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
&& GET_CODE (XVECEXP (*op0, 0, 0)) == REG
}
}
- /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */
+ /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
if (GET_CODE (*op0) == UNSPEC
- && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
+ && XINT (*op0, 1) == UNSPEC_CC_TO_INT
&& XVECLEN (*op0, 0) == 1
- && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
&& GET_CODE (XVECEXP (*op0, 0, 0)) == REG
&& REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
- && *op1 == const0_rtx)
+ && CONST_INT_P (*op1))
{
enum rtx_code new_code = UNKNOWN;
- switch (*code)
+ switch (GET_MODE (XVECEXP (*op0, 0, 0)))
{
- case EQ: new_code = EQ; break;
- case NE: new_code = NE; break;
- default: break;
+ case CCZmode:
+ case CCRAWmode:
+ switch (*code)
+ {
+ case EQ: new_code = EQ; break;
+ case NE: new_code = NE; break;
+ default: break;
+ }
+ break;
+ default: break;
}
if (new_code != UNKNOWN)
{
+ /* For CCRAWmode put the required cc mask into the second
+ operand. */
+ if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode)
+ *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
*op0 = XVECEXP (*op0, 0, 0);
*code = new_code;
}
if (MEM_P (*op0) && REG_P (*op1))
{
rtx tem = *op0; *op0 = *op1; *op1 = tem;
- *code = swap_condition (*code);
+ *code = (int)swap_condition ((enum rtx_code)*code);
}
}
conditional branch testing the result. */
static rtx
-s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, rtx cmp, rtx new_rtx)
+s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
+ rtx cmp, rtx new_rtx)
{
- emit_insn (gen_sync_compare_and_swapsi (old, mem, cmp, new_rtx));
- return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), const0_rtx);
+ emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
+ return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
+ const0_rtx);
}
-/* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
- unconditional jump, else a conditional jump under condition COND. */
+/* Emit a jump instruction to TARGET and return it. If COND is
+ NULL_RTX, emit an unconditional jump, else a conditional jump under
+ condition COND. */
-void
+rtx
s390_emit_jump (rtx target, rtx cond)
{
rtx insn;
target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
- emit_jump_insn (insn);
+ return emit_jump_insn (insn);
}
/* Return branch condition mask to implement a branch
gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
- gcc_assert (XEXP (code, 1) == const0_rtx);
+ gcc_assert (XEXP (code, 1) == const0_rtx
+ || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+ && CONST_INT_P (XEXP (code, 1))));
+
switch (GET_MODE (XEXP (code, 0)))
{
}
break;
+ case CCRAWmode:
+ switch (GET_CODE (code))
+ {
+ case EQ:
+ return INTVAL (XEXP (code, 1));
+ case NE:
+ return (INTVAL (XEXP (code, 1))) ^ 0xf;
+ default:
+ gcc_unreachable ();
+ }
+
default:
return -1;
}
if (GET_CODE (XEXP (code, 0)) == REG
&& REGNO (XEXP (code, 0)) == CC_REGNUM
- && XEXP (code, 1) == const0_rtx)
+ && (XEXP (code, 1) == const0_rtx
+ || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+ && CONST_INT_P (XEXP (code, 1)))))
mask = s390_branch_condition_mask (code);
else
mask = s390_compare_and_branch_condition_mask (code);
return true;
}
+/* Check whether a rotate of ROTL followed by an AND of CONTIG is
+ equivalent to a shift followed by the AND. In particular, CONTIG
+ should not overlap the (rotated) bit 0/bit 63 gap. Negative values
+ for ROTL indicate a rotate to the right. */
+
+bool
+s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
+{
+ int pos, len;
+ bool ok;
+
+ ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
+ gcc_assert (ok);
+
+ return ((rotl >= 0 && rotl <= pos)
+ || (rotl < 0 && -rotl <= bitsize - len - pos));
+}
+
/* Check whether we can (and want to) split a double-word
move in mode MODE from SRC to DST into two single-word
moves, moving the subword FIRST_SUBWORD first. */
if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
target_flags |= MASK_HARD_DFP;
+ /* Enable hardware transactions if available and not explicitly
+ disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
+ if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
+ target_flags |= MASK_OPT_HTM;
+
if (TARGET_HARD_DFP && !TARGET_DFP)
{
if (target_flags_explicit & MASK_HARD_DFP)
break;
case PROCESSOR_2097_Z10:
s390_cost = &z10_cost;
+ break;
case PROCESSOR_2817_Z196:
s390_cost = &z196_cost;
break;
+ case PROCESSOR_2827_ZEC12:
+ s390_cost = &zEC12_cost;
+ break;
default:
s390_cost = &z900_cost;
}
#endif
if (s390_tune == PROCESSOR_2097_Z10
- || s390_tune == PROCESSOR_2817_Z196)
+ || s390_tune == PROCESSOR_2817_Z196
+ || s390_tune == PROCESSOR_2827_ZEC12)
{
maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
global_options.x_param_values,
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
global_options.x_param_values,
global_options_set.x_param_values);
+
+ if (TARGET_TPF)
+ {
+ /* Don't emit DWARF3/4 unless specifically selected. The TPF
+ debuggers do not yet support DWARF 3/4. */
+ if (!global_options_set.x_dwarf_strict)
+ dwarf_strict = 1;
+ if (!global_options_set.x_dwarf_version)
+ dwarf_version = 2;
+ }
}
/* Map for smallest class containing reg regno. */
Thus we don't check the displacement for validity here. If after
elimination the displacement turns out to be invalid after all,
this is fixed up by reload in any case. */
- if (base != arg_pointer_rtx
- && indx != arg_pointer_rtx
- && base != return_address_pointer_rtx
- && indx != return_address_pointer_rtx
- && base != frame_pointer_rtx
- && indx != frame_pointer_rtx
- && base != virtual_stack_vars_rtx
- && indx != virtual_stack_vars_rtx)
+ /* LRA maintains always displacements up to date and we need to
+ know the displacement is right during all LRA not only at the
+ final elimination. */
+ if (lra_in_progress
+ || (base != arg_pointer_rtx
+ && indx != arg_pointer_rtx
+ && base != return_address_pointer_rtx
+ && indx != return_address_pointer_rtx
+ && base != frame_pointer_rtx
+ && indx != frame_pointer_rtx
+ && base != virtual_stack_vars_rtx
+ && indx != virtual_stack_vars_rtx))
if (!DISP_IN_RANGE (offset))
return false;
}
}
-/* Return true if ADDR is of kind symbol_ref or symbol_ref + const_int
- and return these parts in SYMREF and ADDEND. You can pass NULL in
- SYMREF and/or ADDEND if you are not interested in these values.
- Literal pool references are *not* considered symbol references. */
+/* Return TRUE if ADDR is an operand valid for a load/store relative
+ instruction. Be aware that the alignment of the operand needs to
+ be checked separately.
+ Valid addresses are single references or a sum of a reference and a
+ constant integer. Return these parts in SYMREF and ADDEND. You can
+ pass NULL in REF and/or ADDEND if you are not interested in these
+ values. Literal pool references are *not* considered symbol
+ references. */
static bool
-s390_symref_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
+s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
{
HOST_WIDE_INT tmpaddend = 0;
if (GET_CODE (addr) == PLUS)
{
- if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
- && !CONSTANT_POOL_ADDRESS_P (XEXP (addr, 0))
- && CONST_INT_P (XEXP (addr, 1)))
- {
- tmpaddend = INTVAL (XEXP (addr, 1));
- addr = XEXP (addr, 0);
- }
- else
+ if (!CONST_INT_P (XEXP (addr, 1)))
return false;
+
+ tmpaddend = INTVAL (XEXP (addr, 1));
+ addr = XEXP (addr, 0);
}
- else
- if (GET_CODE (addr) != SYMBOL_REF || CONSTANT_POOL_ADDRESS_P (addr))
- return false;
- if (symref)
- *symref = addr;
- if (addend)
- *addend = tmpaddend;
+ if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
+ || (GET_CODE (addr) == UNSPEC
+ && (XINT (addr, 1) == UNSPEC_GOTENT
+ || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
+ {
+ if (symref)
+ *symref = addr;
+ if (addend)
+ *addend = tmpaddend;
- return true;
+ return true;
+ }
+ return false;
}
-
/* Return true if the address in OP is valid for constraint letter C
if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
pool MEMs should be accepted. Only the Q, R, S, T constraint
/* This check makes sure that no symbolic address (except literal
pool references) are accepted by the R or T constraints. */
- if (s390_symref_operand_p (op, NULL, NULL))
+ if (s390_loadrelative_operand_p (op, NULL, NULL))
return 0;
/* Ensure literal pool references are only accepted if LIT_POOL_OK. */
s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
reg_class_t from, reg_class_t to)
{
-/* On s390, copy between fprs and gprs is expensive. */
- if ((reg_classes_intersect_p (from, GENERAL_REGS)
- && reg_classes_intersect_p (to, FP_REGS))
- || (reg_classes_intersect_p (from, FP_REGS)
- && reg_classes_intersect_p (to, GENERAL_REGS)))
+ /* On s390, copy between fprs and gprs is expensive as long as no
+ ldgr/lgdr can be used. */
+ if ((!TARGET_Z10 || GET_MODE_SIZE (mode) != 8)
+ && ((reg_classes_intersect_p (from, GENERAL_REGS)
+ && reg_classes_intersect_p (to, FP_REGS))
+ || (reg_classes_intersect_p (from, FP_REGS)
+ && reg_classes_intersect_p (to, GENERAL_REGS))))
return 10;
return 1;
/* Return the cost of an address rtx ADDR. */
static int
-s390_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
+ addr_space_t as ATTRIBUTE_UNUSED,
+ bool speed ATTRIBUTE_UNUSED)
{
struct s390_address ad;
if (!s390_decompose_address (addr, &ad))
legitimate_reload_fp_constant_p (rtx op)
{
/* Accept floating-point zero operands if the load zero instruction
- can be used. */
+ can be used. Prior to z196 the load fp zero instruction caused a
+ performance penalty if the result is used as BFP number. */
if (TARGET_Z196
&& GET_CODE (op) == CONST_DOUBLE
&& s390_float_const_zero_p (op))
it is most likely being used as an address, so
prefer ADDR_REGS. If 'class' is not a superset
of ADDR_REGS, e.g. FP_REGS, reject this reload. */
+ case CONST:
+ /* A larl operand with odd addend will get fixed via secondary
+ reload. So don't request it to be pushed into literal
+ pool. */
+ if (TARGET_CPU_ZARCH
+ && GET_CODE (XEXP (op, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
+ && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
+ {
+ if (reg_class_subset_p (ADDR_REGS, rclass))
+ return ADDR_REGS;
+ else
+ return NO_REGS;
+ }
+ /* fallthrough */
case LABEL_REF:
case SYMBOL_REF:
- case CONST:
if (!legitimate_reload_constant_p (op))
return NO_REGS;
/* fallthrough */
HOST_WIDE_INT addend;
rtx symref;
- if (!s390_symref_operand_p (addr, &symref, &addend))
+ if (!s390_loadrelative_operand_p (addr, &symref, &addend))
+ return false;
+
+ if (addend & (alignment - 1))
return false;
- return (!SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref)
- && !(addend & (alignment - 1)));
+ if (GET_CODE (symref) == SYMBOL_REF
+ && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
+ return true;
+
+ if (GET_CODE (symref) == UNSPEC
+ && alignment <= UNITS_PER_LONG)
+ return true;
+
+ return false;
}
/* ADDR is moved into REG using larl. If ADDR isn't a valid larl
HOST_WIDE_INT addend;
rtx symref;
- if (!s390_symref_operand_p (addr, &symref, &addend))
+ if (!s390_loadrelative_operand_p (addr, &symref, &addend))
gcc_unreachable ();
if (!(addend & 1))
emit_move_insn (scratch, symref);
/* Increment the address using la in order to avoid clobbering cc. */
- emit_move_insn (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
+ s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
}
}
/* On z10 several optimizer steps may generate larl operands with
an odd addend. */
if (in_p
- && s390_symref_operand_p (x, &symref, &offset)
+ && s390_loadrelative_operand_p (x, &symref, &offset)
&& mode == Pmode
&& !SYMBOL_REF_ALIGN1_P (symref)
&& (offset & 1) == 1)
or if the symref addend of a SI or DI move is not aligned to the
width of the access. */
if (MEM_P (x)
- && s390_symref_operand_p (XEXP (x, 0), NULL, NULL)
+ && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
&& (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
|| (!TARGET_ZARCH && mode == DImode)
|| ((mode == HImode || mode == SImode || mode == DImode)
/* We need a scratch register when loading a PLUS expression which
is not a legitimate operand of the LOAD ADDRESS instruction. */
- if (in_p && s390_plus_operand (x, mode))
+ /* LRA can deal with transformation of plus op very well -- so we
+ don't need to prompt LRA in this case. */
+ if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
sri->icode = (TARGET_64BIT ?
CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
return false;
/* Avoid LA instructions with index register on z196; it is
- preferable to use regular add instructions when possible. */
+ preferable to use regular add instructions when possible.
+ Starting with zEC12 the la with index register is "uncracked"
+ again. */
if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
return false;
legitimize_pic_address (rtx orig, rtx reg)
{
rtx addr = orig;
+ rtx addend = const0_rtx;
rtx new_rtx = orig;
- rtx base;
gcc_assert (!TLS_SYMBOLIC_CONST (addr));
- if (GET_CODE (addr) == LABEL_REF
- || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr)))
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+
+ if (GET_CODE (addr) == PLUS)
{
- /* This is a local symbol. */
- if (TARGET_CPU_ZARCH && larl_operand (addr, VOIDmode))
- {
- /* Access local symbols PC-relative via LARL.
- This is the same as in the non-PIC case, so it is
- handled automatically ... */
- }
+ addend = XEXP (addr, 1);
+ addr = XEXP (addr, 0);
+ }
+
+ if ((GET_CODE (addr) == LABEL_REF
+ || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
+ || (GET_CODE (addr) == UNSPEC &&
+ (XINT (addr, 1) == UNSPEC_GOTENT
+ || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
+ && GET_CODE (addend) == CONST_INT)
+ {
+ /* This can be locally addressed. */
+
+ /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
+ rtx const_addr = (GET_CODE (addr) == UNSPEC ?
+ gen_rtx_CONST (Pmode, addr) : addr);
+
+ if (TARGET_CPU_ZARCH
+ && larl_operand (const_addr, VOIDmode)
+ && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
+ && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
+ {
+ if (INTVAL (addend) & 1)
+ {
+ /* LARL can't handle odd offsets, so emit a pair of LARL
+ and LA. */
+ rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+ if (!DISP_IN_RANGE (INTVAL (addend)))
+ {
+ HOST_WIDE_INT even = INTVAL (addend) - 1;
+ addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
+ addr = gen_rtx_CONST (Pmode, addr);
+ addend = const1_rtx;
+ }
+
+ emit_move_insn (temp, addr);
+ new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
+
+ if (reg != 0)
+ {
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
+ }
+ }
+ else
+ {
+ /* If the offset is even, we can just use LARL. This
+ will happen automatically. */
+ }
+ }
else
- {
- /* Access local symbols relative to the GOT. */
+ {
+ /* No larl - Access local symbols relative to the GOT. */
- rtx temp = reg? reg : gen_reg_rtx (Pmode);
+ rtx temp = reg? reg : gen_reg_rtx (Pmode);
if (reload_in_progress || reload_completed)
df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
- addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
- addr = gen_rtx_CONST (Pmode, addr);
- addr = force_const_mem (Pmode, addr);
+ addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+ if (addend != const0_rtx)
+ addr = gen_rtx_PLUS (Pmode, addr, addend);
+ addr = gen_rtx_CONST (Pmode, addr);
+ addr = force_const_mem (Pmode, addr);
emit_move_insn (temp, addr);
- new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
- if (reg != 0)
- {
- s390_load_address (reg, new_rtx);
- new_rtx = reg;
- }
- }
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+ if (reg != 0)
+ {
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
+ }
+ }
}
- else if (GET_CODE (addr) == SYMBOL_REF)
+ else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
{
+ /* A non-local symbol reference without addend.
+
+ The symbol ref is wrapped into an UNSPEC to make sure the
+ proper operand modifier (@GOT or @GOTENT) will be emitted.
+ This will tell the linker to put the symbol into the GOT.
+
+ Additionally the code dereferencing the GOT slot is emitted here.
+
+ An addend to the symref needs to be added afterwards.
+ legitimize_pic_address calls itself recursively to handle
+ that case. So no need to do it here. */
+
if (reg == 0)
reg = gen_reg_rtx (Pmode);
- if (flag_pic == 1)
+ if (TARGET_Z10)
+ {
+ /* Use load relative if possible.
+ lgrl <target>, sym@GOTENT */
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
+
+ emit_move_insn (reg, new_rtx);
+ new_rtx = reg;
+ }
+ else if (flag_pic == 1)
{
- /* Assume GOT offset < 4k. This is handled the same way
- in both 31- and 64-bit code (@GOT). */
+ /* Assume GOT offset is a valid displacement operand (< 4k
+ or < 512k with z990). This is handled the same way in
+ both 31- and 64-bit code (@GOT).
+ lg <target>, sym@GOT(r12) */
if (reload_in_progress || reload_completed)
df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
else if (TARGET_CPU_ZARCH)
{
/* If the GOT offset might be >= 4k, we determine the position
- of the GOT entry via a PC-relative LARL (@GOTENT). */
+ of the GOT entry via a PC-relative LARL (@GOTENT).
+ larl temp, sym@GOTENT
+ lg <target>, 0(temp) */
rtx temp = reg ? reg : gen_reg_rtx (Pmode);
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
new_rtx = gen_rtx_CONST (Pmode, new_rtx);
- emit_move_insn (temp, new_rtx);
+ emit_move_insn (temp, new_rtx);
- new_rtx = gen_const_mem (Pmode, temp);
+ new_rtx = gen_const_mem (Pmode, temp);
emit_move_insn (reg, new_rtx);
+
new_rtx = reg;
}
else
{
/* If the GOT offset might be >= 4k, we have to load it
- from the literal pool (@GOT). */
+ from the literal pool (@GOT).
+
+ lg temp, lit-litbase(r13)
+ lg <target>, 0(temp)
+ lit: .long sym@GOT */
rtx temp = reg ? reg : gen_reg_rtx (Pmode);
new_rtx = reg;
}
}
- else
+ else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
{
- if (GET_CODE (addr) == CONST)
+ gcc_assert (XVECLEN (addr, 0) == 1);
+ switch (XINT (addr, 1))
{
- addr = XEXP (addr, 0);
- if (GET_CODE (addr) == UNSPEC)
- {
- gcc_assert (XVECLEN (addr, 0) == 1);
- switch (XINT (addr, 1))
- {
- /* If someone moved a GOT-relative UNSPEC
- out of the literal pool, force them back in. */
- case UNSPEC_GOTOFF:
- case UNSPEC_PLTOFF:
- new_rtx = force_const_mem (Pmode, orig);
- break;
-
- /* @GOT is OK as is if small. */
- case UNSPEC_GOT:
- if (flag_pic == 2)
- new_rtx = force_const_mem (Pmode, orig);
- break;
+ /* These address symbols (or PLT slots) relative to the GOT
+ (not GOT slots!). In general this will exceed the
+ displacement range so these value belong into the literal
+ pool. */
+ case UNSPEC_GOTOFF:
+ case UNSPEC_PLTOFF:
+ new_rtx = force_const_mem (Pmode, orig);
+ break;
- /* @GOTENT is OK as is. */
- case UNSPEC_GOTENT:
- break;
-
- /* @PLT is OK as is on 64-bit, must be converted to
- GOT-relative @PLTOFF on 31-bit. */
- case UNSPEC_PLT:
- if (!TARGET_CPU_ZARCH)
- {
- rtx temp = reg? reg : gen_reg_rtx (Pmode);
-
- if (reload_in_progress || reload_completed)
- df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
-
- addr = XVECEXP (addr, 0, 0);
- addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
- UNSPEC_PLTOFF);
- addr = gen_rtx_CONST (Pmode, addr);
- addr = force_const_mem (Pmode, addr);
- emit_move_insn (temp, addr);
-
- new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
- if (reg != 0)
- {
- s390_load_address (reg, new_rtx);
- new_rtx = reg;
- }
- }
- break;
-
- /* Everything else cannot happen. */
- default:
- gcc_unreachable ();
- }
- }
- else
- gcc_assert (GET_CODE (addr) == PLUS);
- }
- if (GET_CODE (addr) == PLUS)
- {
- rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+ /* For -fPIC the GOT size might exceed the displacement
+ range so make sure the value is in the literal pool. */
+ case UNSPEC_GOT:
+ if (flag_pic == 2)
+ new_rtx = force_const_mem (Pmode, orig);
+ break;
- gcc_assert (!TLS_SYMBOLIC_CONST (op0));
- gcc_assert (!TLS_SYMBOLIC_CONST (op1));
+ /* For @GOTENT larl is used. This is handled like local
+ symbol refs. */
+ case UNSPEC_GOTENT:
+ gcc_unreachable ();
+ break;
- /* Check first to see if this is a constant offset
- from a local symbol reference. */
- if ((GET_CODE (op0) == LABEL_REF
- || (GET_CODE (op0) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (op0)))
- && GET_CODE (op1) == CONST_INT)
+ /* @PLT is OK as is on 64-bit, must be converted to
+ GOT-relative @PLTOFF on 31-bit. */
+ case UNSPEC_PLT:
+ if (!TARGET_CPU_ZARCH)
{
- if (TARGET_CPU_ZARCH
- && larl_operand (op0, VOIDmode)
- && INTVAL (op1) < (HOST_WIDE_INT)1 << 31
- && INTVAL (op1) >= -((HOST_WIDE_INT)1 << 31))
- {
- if (INTVAL (op1) & 1)
- {
- /* LARL can't handle odd offsets, so emit a
- pair of LARL and LA. */
- rtx temp = reg? reg : gen_reg_rtx (Pmode);
-
- if (!DISP_IN_RANGE (INTVAL (op1)))
- {
- HOST_WIDE_INT even = INTVAL (op1) - 1;
- op0 = gen_rtx_PLUS (Pmode, op0, GEN_INT (even));
- op0 = gen_rtx_CONST (Pmode, op0);
- op1 = const1_rtx;
- }
-
- emit_move_insn (temp, op0);
- new_rtx = gen_rtx_PLUS (Pmode, temp, op1);
-
- if (reg != 0)
- {
- s390_load_address (reg, new_rtx);
- new_rtx = reg;
- }
- }
- else
- {
- /* If the offset is even, we can just use LARL.
- This will happen automatically. */
- }
- }
- else
- {
- /* Access local symbols relative to the GOT. */
-
- rtx temp = reg? reg : gen_reg_rtx (Pmode);
-
- if (reload_in_progress || reload_completed)
- df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
-
- addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
- UNSPEC_GOTOFF);
- addr = gen_rtx_PLUS (Pmode, addr, op1);
- addr = gen_rtx_CONST (Pmode, addr);
- addr = force_const_mem (Pmode, addr);
- emit_move_insn (temp, addr);
-
- new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
- if (reg != 0)
- {
- s390_load_address (reg, new_rtx);
- new_rtx = reg;
- }
- }
+ rtx temp = reg? reg : gen_reg_rtx (Pmode);
+
+ if (reload_in_progress || reload_completed)
+ df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+ addr = XVECEXP (addr, 0, 0);
+ addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
+ UNSPEC_PLTOFF);
+ if (addend != const0_rtx)
+ addr = gen_rtx_PLUS (Pmode, addr, addend);
+ addr = gen_rtx_CONST (Pmode, addr);
+ addr = force_const_mem (Pmode, addr);
+ emit_move_insn (temp, addr);
+
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+ if (reg != 0)
+ {
+ s390_load_address (reg, new_rtx);
+ new_rtx = reg;
+ }
}
+ else
+ /* On 64 bit larl can be used. This case is handled like
+ local symbol refs. */
+ gcc_unreachable ();
+ break;
- /* Now, check whether it is a GOT relative symbol plus offset
- that was pulled out of the literal pool. Force it back in. */
-
- else if (GET_CODE (op0) == UNSPEC
- && GET_CODE (op1) == CONST_INT
- && XINT (op0, 1) == UNSPEC_GOTOFF)
- {
- gcc_assert (XVECLEN (op0, 0) == 1);
-
- new_rtx = force_const_mem (Pmode, orig);
- }
+ /* Everything else cannot happen. */
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else if (addend != const0_rtx)
+ {
+ /* Otherwise, compute the sum. */
- /* Otherwise, compute the sum. */
- else
+ rtx base = legitimize_pic_address (addr, reg);
+ new_rtx = legitimize_pic_address (addend,
+ base == reg ? NULL_RTX : reg);
+ if (GET_CODE (new_rtx) == CONST_INT)
+ new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
+ else
+ {
+ if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
{
- base = legitimize_pic_address (XEXP (addr, 0), reg);
- new_rtx = legitimize_pic_address (XEXP (addr, 1),
- base == reg ? NULL_RTX : reg);
- if (GET_CODE (new_rtx) == CONST_INT)
- new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
- else
- {
- if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
- {
- base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
- new_rtx = XEXP (new_rtx, 1);
- }
- new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
- }
-
- if (GET_CODE (new_rtx) == CONST)
- new_rtx = XEXP (new_rtx, 0);
- new_rtx = force_operand (new_rtx, 0);
+ base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+ new_rtx = XEXP (new_rtx, 1);
}
+ new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
}
+
+ if (GET_CODE (new_rtx) == CONST)
+ new_rtx = XEXP (new_rtx, 0);
+ new_rtx = force_operand (new_rtx, 0);
}
+
return new_rtx;
}
/* Emit code to move LEN bytes from DST to SRC. */
-void
+bool
s390_expand_movmem (rtx dst, rtx src, rtx len)
{
+ /* When tuning for z10 or higher we rely on the Glibc functions to
+ do the right thing. Only for constant lengths below 64k we will
+ generate inline code. */
+ if (s390_tune >= PROCESSOR_2097_Z10
+ && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
+ return false;
+
if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
{
if (INTVAL (len) > 0)
convert_to_mode (Pmode, count, 1)));
emit_label (end_label);
}
+ return true;
}
/* Emit code to set LEN bytes at DST to VAL.
/* Emit code to compare LEN bytes at OP0 with those at OP1,
and return the result in TARGET. */
-void
+bool
s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
{
rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
rtx tmp;
+ /* When tuning for z10 or higher we rely on the Glibc functions to
+ do the right thing. Only for constant lengths below 64k we will
+ generate inline code. */
+ if (s390_tune >= PROCESSOR_2097_Z10
+ && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
+ return false;
+
/* As the result of CMPINT is inverted compared to what we need,
we have to swap the operands. */
tmp = op0; op0 = op1; op1 = tmp;
emit_insn (gen_cmpint (target, ccreg));
}
+ return true;
}
{
int bitsize = INTVAL (op1);
int bitpos = INTVAL (op2);
+ enum machine_mode mode = GET_MODE (dest);
+ enum machine_mode smode;
+ int smode_bsize, mode_bsize;
+ rtx op, clobber;
- /* On z10 we can use the risbg instruction to implement insv. */
- if (TARGET_Z10
- && ((GET_MODE (dest) == DImode && GET_MODE (src) == DImode)
- || (GET_MODE (dest) == SImode && GET_MODE (src) == SImode)))
+ if (bitsize + bitpos > GET_MODE_SIZE (mode))
+ return false;
+
+ /* Generate INSERT IMMEDIATE (IILL et al). */
+ /* (set (ze (reg)) (const_int)). */
+ if (TARGET_ZARCH
+ && register_operand (dest, word_mode)
+ && (bitpos % 16) == 0
+ && (bitsize % 16) == 0
+ && const_int_operand (src, VOIDmode))
{
- rtx op;
- rtx clobber;
+ HOST_WIDE_INT val = INTVAL (src);
+ int regpos = bitpos + bitsize;
- op = gen_rtx_SET (GET_MODE(src),
- gen_rtx_ZERO_EXTRACT (GET_MODE (dest), dest, op1, op2),
- src);
- clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
+ while (regpos > bitpos)
+ {
+ enum machine_mode putmode;
+ int putsize;
+ if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
+ putmode = SImode;
+ else
+ putmode = HImode;
+
+ putsize = GET_MODE_BITSIZE (putmode);
+ regpos -= putsize;
+ emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
+ GEN_INT (putsize),
+ GEN_INT (regpos)),
+ gen_int_mode (val, putmode));
+ val >>= putsize;
+ }
+ gcc_assert (regpos == bitpos);
return true;
}
- /* We need byte alignment. */
- if (bitsize % BITS_PER_UNIT)
- return false;
+ smode = smallest_mode_for_size (bitsize, MODE_INT);
+ smode_bsize = GET_MODE_BITSIZE (smode);
+ mode_bsize = GET_MODE_BITSIZE (mode);
+ /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
if (bitpos == 0
- && memory_operand (dest, VOIDmode)
+ && (bitsize % BITS_PER_UNIT) == 0
+ && MEM_P (dest)
&& (register_operand (src, word_mode)
|| const_int_operand (src, VOIDmode)))
{
/* Emit standard pattern if possible. */
- enum machine_mode mode = smallest_mode_for_size (bitsize, MODE_INT);
- if (GET_MODE_BITSIZE (mode) == bitsize)
- emit_move_insn (adjust_address (dest, mode, 0), gen_lowpart (mode, src));
+ if (smode_bsize == bitsize)
+ {
+ emit_move_insn (adjust_address (dest, smode, 0),
+ gen_lowpart (smode, src));
+ return true;
+ }
/* (set (ze (mem)) (const_int)). */
else if (const_int_operand (src, VOIDmode))
{
int size = bitsize / BITS_PER_UNIT;
- rtx src_mem = adjust_address (force_const_mem (word_mode, src), BLKmode,
- GET_MODE_SIZE (word_mode) - size);
+ rtx src_mem = adjust_address (force_const_mem (word_mode, src),
+ BLKmode,
+ UNITS_PER_WORD - size);
dest = adjust_address (dest, BLKmode, 0);
set_mem_size (dest, size);
s390_expand_movmem (dest, src_mem, GEN_INT (size));
+ return true;
}
/* (set (ze (mem)) (reg)). */
else if (register_operand (src, word_mode))
{
- if (bitsize <= GET_MODE_BITSIZE (SImode))
+ if (bitsize <= 32)
emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
const0_rtx), src);
else
{
/* Emit st,stcmh sequence. */
- int stcmh_width = bitsize - GET_MODE_BITSIZE (SImode);
+ int stcmh_width = bitsize - 32;
int size = stcmh_width / BITS_PER_UNIT;
emit_move_insn (adjust_address (dest, SImode, size),
gen_lowpart (SImode, src));
set_mem_size (dest, size);
- emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, GEN_INT
- (stcmh_width), const0_rtx),
- gen_rtx_LSHIFTRT (word_mode, src, GEN_INT
- (GET_MODE_BITSIZE (SImode))));
+ emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
+ GEN_INT (stcmh_width),
+ const0_rtx),
+ gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
}
+ return true;
}
- else
- return false;
+ }
- return true;
+ /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
+ if ((bitpos % BITS_PER_UNIT) == 0
+ && (bitsize % BITS_PER_UNIT) == 0
+ && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
+ && MEM_P (src)
+ && (mode == DImode || mode == SImode)
+ && register_operand (dest, mode))
+ {
+ /* Emit a strict_low_part pattern if possible. */
+ if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
+ {
+ op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
+ op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
+ clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
+ return true;
+ }
+
+ /* ??? There are more powerful versions of ICM that are not
+ completely represented in the md file. */
}
- /* (set (ze (reg)) (const_int)). */
- if (TARGET_ZARCH
- && register_operand (dest, word_mode)
- && (bitpos % 16) == 0
- && (bitsize % 16) == 0
- && const_int_operand (src, VOIDmode))
+ /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
+ if (TARGET_Z10 && (mode == DImode || mode == SImode))
{
- HOST_WIDE_INT val = INTVAL (src);
- int regpos = bitpos + bitsize;
+ enum machine_mode mode_s = GET_MODE (src);
- while (regpos > bitpos)
+ if (mode_s == VOIDmode)
{
- enum machine_mode putmode;
- int putsize;
+ /* Assume const_int etc already in the proper mode. */
+ src = force_reg (mode, src);
+ }
+ else if (mode_s != mode)
+ {
+ gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
+ src = force_reg (mode_s, src);
+ src = gen_lowpart (mode, src);
+ }
- if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
- putmode = SImode;
- else
- putmode = HImode;
+ op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
+ op = gen_rtx_SET (VOIDmode, op, src);
- putsize = GET_MODE_BITSIZE (putmode);
- regpos -= putsize;
- emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
- GEN_INT (putsize),
- GEN_INT (regpos)),
- gen_int_mode (val, putmode));
- val >>= putsize;
+ if (!TARGET_ZEC12)
+ {
+ clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
+ op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
}
- gcc_assert (regpos == bitpos);
+ emit_insn (op);
+
return true;
}
/* As we already have some offset, evaluate the remaining distance. */
ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
NULL_RTX, 1, OPTAB_DIRECT);
+ }
+ /* Shift is the byte count, but we need the bitcount. */
+ ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
+ NULL_RTX, 1, OPTAB_DIRECT);
+
+ /* Calculate masks. */
+ ac->modemask = expand_simple_binop (SImode, ASHIFT,
+ GEN_INT (GET_MODE_MASK (mode)),
+ ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
+ ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
+ NULL_RTX, 1);
+}
+
+/* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
+ use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
+ perform the merge in SEQ2. */
+
+static rtx
+s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
+ enum machine_mode mode, rtx val, rtx ins)
+{
+ rtx tmp;
+
+ if (ac->aligned)
+ {
+ start_sequence ();
+ tmp = copy_to_mode_reg (SImode, val);
+ if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
+ const0_rtx, ins))
+ {
+ *seq1 = NULL;
+ *seq2 = get_insns ();
+ end_sequence ();
+ return tmp;
+ }
+ end_sequence ();
}
- /* Shift is the byte count, but we need the bitcount. */
- ac->shift = expand_simple_binop (SImode, MULT, ac->shift, GEN_INT (BITS_PER_UNIT),
- NULL_RTX, 1, OPTAB_DIRECT);
- /* Calculate masks. */
- ac->modemask = expand_simple_binop (SImode, ASHIFT,
- GEN_INT (GET_MODE_MASK (mode)), ac->shift,
- NULL_RTX, 1, OPTAB_DIRECT);
- ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1);
+
+ /* Failed to use insv. Generate a two part shift and mask. */
+ start_sequence ();
+ tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
+ *seq1 = get_insns ();
+ end_sequence ();
+
+ start_sequence ();
+ tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
+ *seq2 = get_insns ();
+ end_sequence ();
+
+ return tmp;
}
/* Expand an atomic compare and swap operation for HImode and QImode. MEM is
- the memory location, CMP the old value to compare MEM with and NEW_RTX the value
- to set if CMP == MEM.
- CMP is never in memory for compare_and_swap_cc because
- expand_bool_compare_and_swap puts it into a register for later compare. */
+ the memory location, CMP the old value to compare MEM with and NEW_RTX the
+ value to set if CMP == MEM. */
void
-s390_expand_cs_hqi (enum machine_mode mode, rtx target, rtx mem, rtx cmp, rtx new_rtx)
+s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
+ rtx cmp, rtx new_rtx, bool is_weak)
{
struct alignment_context ac;
- rtx cmpv, newv, val, resv, cc;
+ rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
rtx res = gen_reg_rtx (SImode);
- rtx csloop = gen_label_rtx ();
- rtx csend = gen_label_rtx ();
+ rtx csloop = NULL, csend = NULL;
- gcc_assert (register_operand (target, VOIDmode));
gcc_assert (MEM_P (mem));
init_alignment_context (&ac, mem, mode);
- /* Shift the values to the correct bit positions. */
- if (!(ac.aligned && MEM_P (cmp)))
- cmp = s390_expand_mask_and_shift (cmp, mode, ac.shift);
- if (!(ac.aligned && MEM_P (new_rtx)))
- new_rtx = s390_expand_mask_and_shift (new_rtx, mode, ac.shift);
-
/* Load full word. Subsequent loads are performed by CS. */
val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
NULL_RTX, 1, OPTAB_DIRECT);
+ /* Prepare insertions of cmp and new_rtx into the loaded value. When
+ possible, we try to use insv to make this happen efficiently. If
+ that fails we'll generate code both inside and outside the loop. */
+ cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
+ newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
+
+ if (seq0)
+ emit_insn (seq0);
+ if (seq1)
+ emit_insn (seq1);
+
/* Start CS loop. */
- emit_label (csloop);
+ if (!is_weak)
+ {
+ /* Begin assuming success. */
+ emit_move_insn (btarget, const1_rtx);
+
+ csloop = gen_label_rtx ();
+ csend = gen_label_rtx ();
+ emit_label (csloop);
+ }
+
/* val = "<mem>00..0<mem>"
* cmp = "00..0<cmp>00..0"
* new = "00..0<new>00..0"
*/
- /* Patch cmp and new with val at correct position. */
- if (ac.aligned && MEM_P (cmp))
- {
- cmpv = force_reg (SImode, val);
- store_bit_field (cmpv, GET_MODE_BITSIZE (mode), 0,
- 0, 0, SImode, cmp);
- }
+ emit_insn (seq2);
+ emit_insn (seq3);
+
+ cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
+ if (is_weak)
+ emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
else
- cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
- NULL_RTX, 1, OPTAB_DIRECT));
- if (ac.aligned && MEM_P (new_rtx))
{
- newv = force_reg (SImode, val);
- store_bit_field (newv, GET_MODE_BITSIZE (mode), 0,
- 0, 0, SImode, new_rtx);
- }
- else
- newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
- NULL_RTX, 1, OPTAB_DIRECT));
+ rtx tmp;
- /* Jump to end if we're done (likely?). */
- s390_emit_jump (csend, s390_emit_compare_and_swap (EQ, res, ac.memsi,
- cmpv, newv));
+ /* Jump to end if we're done (likely?). */
+ s390_emit_jump (csend, cc);
- /* Check for changes outside mode. */
- resv = expand_simple_binop (SImode, AND, res, ac.modemaski,
- NULL_RTX, 1, OPTAB_DIRECT);
- cc = s390_emit_compare (NE, resv, val);
- emit_move_insn (val, resv);
- /* Loop internal if so. */
- s390_emit_jump (csloop, cc);
+ /* Check for changes outside mode, and loop internal if so.
+ Arrange the moves so that the compare is adjacent to the
+ branch so that we can generate CRJ. */
+ tmp = copy_to_reg (val);
+ force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
+ 1, OPTAB_DIRECT);
+ cc = s390_emit_compare (NE, val, tmp);
+ s390_emit_jump (csloop, cc);
- emit_label (csend);
+ /* Failed. */
+ emit_move_insn (btarget, const0_rtx);
+ emit_label (csend);
+ }
/* Return the correct part of the bitfield. */
- convert_move (target, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
- NULL_RTX, 1, OPTAB_DIRECT), 1);
+ convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
+ NULL_RTX, 1, OPTAB_DIRECT), 1);
}
/* Expand an atomic operation CODE of mode MODE. MEM is the memory location
{
struct s390_address ad;
- if (s390_symref_operand_p (addr, NULL, NULL))
+ if (s390_loadrelative_operand_p (addr, NULL, NULL))
{
if (!TARGET_Z10)
{
'C': print opcode suffix for branch condition.
'D': print opcode suffix for inverse branch condition.
'E': print opcode suffix for branch on index instruction.
- 'J': print tls_load/tls_gdcall/tls_ldcall suffix
'G': print the size of the operand in bytes.
+ 'J': print tls_load/tls_gdcall/tls_ldcall suffix
+ 'M': print the second word of a TImode operand.
+ 'N': print the second word of a DImode operand.
'O': print only the displacement of a memory reference.
'R': print only the base register of a memory reference.
'S': print S-type memory reference (base+displacement).
- 'N': print the second word of a DImode operand.
- 'M': print the second word of a TImode operand.
'Y': print shift count operand.
'b': print integer X as if it's an unsigned byte.
'c': print integer X as if it's an signed byte.
- 'x': print integer X as if it's an unsigned halfword.
+ 'e': "end" of DImode contiguous bitmask X.
+ 'f': "end" of SImode contiguous bitmask X.
'h': print integer X as if it's a signed halfword.
'i': print the first nonzero HImode part of X.
'j': print the first HImode part unequal to -1 of X.
'k': print the first nonzero SImode part of X.
'm': print the first SImode part unequal to -1 of X.
- 'o': print integer X as if it's an unsigned 32bit word. */
+ 'o': print integer X as if it's an unsigned 32bit word.
+ 's': "start" of DImode contiguous bitmask X.
+ 't': "start" of SImode contiguous bitmask X.
+ 'x': print integer X as if it's an unsigned halfword.
+*/
void
print_operand (FILE *file, rtx x, int code)
{
+ HOST_WIDE_INT ival;
+
switch (code)
{
case 'C':
break;
case CONST_INT:
- if (code == 'b')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xff);
- else if (code == 'c')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xff) ^ 0x80) - 0x80);
- else if (code == 'x')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
- else if (code == 'h')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
- else if (code == 'i')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, HImode, 0));
- else if (code == 'j')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, HImode, -1));
- else if (code == 'k')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, SImode, 0));
- else if (code == 'm')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, SImode, -1));
- else if (code == 'o')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffffffff);
- else
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ ival = INTVAL (x);
+ switch (code)
+ {
+ case 0:
+ break;
+ case 'b':
+ ival &= 0xff;
+ break;
+ case 'c':
+ ival = ((ival & 0xff) ^ 0x80) - 0x80;
+ break;
+ case 'x':
+ ival &= 0xffff;
+ break;
+ case 'h':
+ ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
+ break;
+ case 'i':
+ ival = s390_extract_part (x, HImode, 0);
+ break;
+ case 'j':
+ ival = s390_extract_part (x, HImode, -1);
+ break;
+ case 'k':
+ ival = s390_extract_part (x, SImode, 0);
+ break;
+ case 'm':
+ ival = s390_extract_part (x, SImode, -1);
+ break;
+ case 'o':
+ ival &= 0xffffffff;
+ break;
+ case 'e': case 'f':
+ case 's': case 't':
+ {
+ int pos, len;
+ bool ok;
+
+ len = (code == 's' || code == 'e' ? 64 : 32);
+ ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
+ gcc_assert (ok);
+ if (code == 's' || code == 't')
+ ival = 64 - pos - len;
+ else
+ ival = 64 - 1 - pos;
+ }
+ break;
+ default:
+ output_operand_lossage ("invalid constant for output modifier '%c'", code);
+ }
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
break;
case CONST_DOUBLE:
{
rtx target, pat;
- if (GET_CODE (dep_rtx) == INSN)
- dep_rtx = PATTERN (dep_rtx);
+ if (NONJUMP_INSN_P (dep_rtx))
+ dep_rtx = PATTERN (dep_rtx);
if (GET_CODE (dep_rtx) == SET)
{
if (s390_tune != PROCESSOR_2084_Z990
&& s390_tune != PROCESSOR_2094_Z9_109
&& s390_tune != PROCESSOR_2097_Z10
- && s390_tune != PROCESSOR_2817_Z196)
+ && s390_tune != PROCESSOR_2817_Z196
+ && s390_tune != PROCESSOR_2827_ZEC12)
return priority;
switch (s390_safe_attr_type (insn))
case PROCESSOR_2817_Z196:
return 3;
case PROCESSOR_2097_Z10:
+ case PROCESSOR_2827_ZEC12:
return 2;
default:
return 1;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
- if (GET_CODE (insn) != JUMP_INSN)
+ if (! JUMP_P (insn))
continue;
pat = PATTERN (insn);
static rtx
s390_execute_label (rtx insn)
{
- if (GET_CODE (insn) == INSN
+ if (NONJUMP_INSN_P (insn)
&& GET_CODE (PATTERN (insn)) == PARALLEL
&& GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
&& XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
- if (GET_CODE (insn) == INSN
+ if (NONJUMP_INSN_P (insn)
&& GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
&& XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
{
s390_add_execute (pool, insn);
}
- else if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+ else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
{
rtx pool_ref = NULL_RTX;
find_constant_pool_ref (PATTERN (insn), &pool_ref);
if (INSN_P (insn))
replace_ltrel_base (&PATTERN (insn));
- if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+ if (NONJUMP_INSN_P (insn) || CALL_P (insn))
{
rtx addr, pool_ref = NULL_RTX;
find_constant_pool_ref (PATTERN (insn), &pool_ref);
s390_add_execute (curr_pool, insn);
s390_add_pool_insn (curr_pool, insn);
}
- else if (GET_CODE (insn) == INSN || CALL_P (insn))
+ else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
{
rtx pool_ref = NULL_RTX;
find_constant_pool_ref (PATTERN (insn), &pool_ref);
}
}
- if (GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == CODE_LABEL)
+ if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
{
if (curr_pool)
s390_add_pool_insn (curr_pool, insn);
Those will have an effect on code size, which we need to
consider here. This calculation makes rather pessimistic
worst-case assumptions. */
- if (GET_CODE (insn) == CODE_LABEL)
+ if (LABEL_P (insn))
extra_size += 6;
if (chunk_size < S390_POOL_CHUNK_MIN
continue;
/* Pool chunks can only be inserted after BARRIERs ... */
- if (GET_CODE (insn) == BARRIER)
+ if (BARRIER_P (insn))
{
s390_end_pool (curr_pool, insn);
curr_pool = NULL;
if (!section_switch_p)
{
/* We can insert the barrier only after a 'real' insn. */
- if (GET_CODE (insn) != INSN && GET_CODE (insn) != CALL_INSN)
+ if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
continue;
if (get_attr_length (insn) == 0)
continue;
prev = prev_nonnote_insn (prev);
if (prev)
jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
- INSN_LOCATOR (prev));
+ INSN_LOCATION (prev));
else
jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
barrier = emit_barrier_after (jump);
Don't do that, however, if it is the label before
a jump table. */
- if (GET_CODE (insn) == CODE_LABEL
+ if (LABEL_P (insn)
&& (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
{
- rtx vec_insn = next_real_insn (insn);
- rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
- PATTERN (vec_insn) : NULL_RTX;
- if (!vec_pat
- || !(GET_CODE (vec_pat) == ADDR_VEC
- || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
+ rtx vec_insn = NEXT_INSN (insn);
+ if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
}
/* If we have a direct jump (conditional or unconditional)
or a casesi jump, check all potential targets. */
- else if (GET_CODE (insn) == JUMP_INSN)
+ else if (JUMP_P (insn))
{
rtx pat = PATTERN (insn);
+ rtx table;
+
if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
pat = XVECEXP (pat, 0, 0);
bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
}
}
- else if (GET_CODE (pat) == PARALLEL
- && XVECLEN (pat, 0) == 2
- && GET_CODE (XVECEXP (pat, 0, 0)) == SET
- && GET_CODE (XVECEXP (pat, 0, 1)) == USE
- && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
- {
- /* Find the jump table used by this casesi jump. */
- rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
- rtx vec_insn = next_real_insn (vec_label);
- rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
- PATTERN (vec_insn) : NULL_RTX;
- if (vec_pat
- && (GET_CODE (vec_pat) == ADDR_VEC
- || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
- {
- int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
-
- for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
- {
- rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
-
- if (s390_find_pool (pool_list, label)
- != s390_find_pool (pool_list, insn))
- bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
- }
+ else if (tablejump_p (insn, NULL, &table))
+ {
+ rtx vec_pat = PATTERN (table);
+ int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
+
+ for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
+ {
+ rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
+
+ if (s390_find_pool (pool_list, label)
+ != s390_find_pool (pool_list, insn))
+ bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
}
}
}
/* Insert base register reload insns at every far label. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- if (GET_CODE (insn) == CODE_LABEL
+ if (LABEL_P (insn)
&& bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
{
struct constant_pool *pool = s390_find_pool (pool_list, insn);
if (!curr_pool)
continue;
- if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
+ if (NONJUMP_INSN_P (insn) || CALL_P (insn))
{
rtx addr, pool_ref = NULL_RTX;
find_constant_pool_ref (PATTERN (insn), &pool_ref);
rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
rtx label = NEXT_INSN (curr_pool->pool_insn);
- if (jump && GET_CODE (jump) == JUMP_INSN
- && barrier && GET_CODE (barrier) == BARRIER
- && label && GET_CODE (label) == CODE_LABEL
+ if (jump && JUMP_P (jump)
+ && barrier && BARRIER_P (barrier)
+ && label && LABEL_P (label)
&& GET_CODE (PATTERN (jump)) == SET
&& SET_DEST (PATTERN (jump)) == pc_rtx
&& GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
{
rtx next_insn = NEXT_INSN (insn);
- if (GET_CODE (insn) == INSN
+ if (NONJUMP_INSN_P (insn)
&& GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
&& XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
if (GET_CODE (setreg) == SUBREG)
{
rtx inner = SUBREG_REG (setreg);
- if (!GENERAL_REG_P (inner))
+ if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
return;
regno = subreg_regno (setreg);
}
- else if (GENERAL_REG_P (setreg))
+ else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
regno = REGNO (setreg);
else
return;
rtx cur_insn;
unsigned int i;
- memset (regs_ever_clobbered, 0, 16 * sizeof (int));
+ memset (regs_ever_clobbered, 0, 32 * sizeof (int));
/* For non-leaf functions we have to consider all call clobbered regs to be
clobbered. */
if (!crtl->is_leaf)
{
- for (i = 0; i < 16; i++)
+ for (i = 0; i < 32; i++)
regs_ever_clobbered[i] = call_really_used_regs[i];
}
See expand_builtin_unwind_init. For regs_ever_live this is done by
reload. */
if (cfun->has_nonlocal_label)
- for (i = 0; i < 16; i++)
+ for (i = 0; i < 32; i++)
if (!call_really_used_regs[i])
regs_ever_clobbered[i] = 1;
s390_frame_area (int *area_bottom, int *area_top)
{
int b, t;
- int i;
b = INT_MAX;
t = INT_MIN;
}
if (!TARGET_64BIT)
- for (i = 2; i < 4; i++)
- if (cfun_fpr_bit_p (i))
+ {
+ if (cfun_fpr_save_p (FPR4_REGNUM))
{
- b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8);
- t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8);
+ b = MIN (b, cfun_frame_layout.f4_offset);
+ t = MAX (t, cfun_frame_layout.f4_offset + 8);
}
-
+ if (cfun_fpr_save_p (FPR6_REGNUM))
+ {
+ b = MIN (b, cfun_frame_layout.f4_offset + 8);
+ t = MAX (t, cfun_frame_layout.f4_offset + 16);
+ }
+ }
*area_bottom = b;
*area_top = t;
}
{
int i, j;
- /* fprs 8 - 15 are call saved for 64 Bit ABI. */
- cfun_frame_layout.fpr_bitmap = 0;
- cfun_frame_layout.high_fprs = 0;
- if (TARGET_64BIT)
- for (i = 24; i < 32; i++)
- if (df_regs_ever_live_p (i) && !global_regs[i])
- {
- cfun_set_fpr_bit (i - 16);
- cfun_frame_layout.high_fprs++;
- }
-
/* Find first and last gpr to be saved. We trust regs_ever_live
data, except that we don't save and restore global registers.
s390_regs_ever_clobbered (clobbered_regs);
+ /* fprs 8 - 15 are call saved for 64 Bit ABI. */
+ if (!epilogue_completed)
+ {
+ cfun_frame_layout.fpr_bitmap = 0;
+ cfun_frame_layout.high_fprs = 0;
+ if (TARGET_64BIT)
+ for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
+ /* During reload we have to use the df_regs_ever_live infos
+ since reload is marking FPRs used as spill slots there as
+ live before actually making the code changes. Without
+ this we fail during elimination offset verification. */
+ if ((clobbered_regs[i]
+ || (df_regs_ever_live_p (i)
+ && (lra_in_progress
+ || reload_in_progress
+ || crtl->saves_all_registers)))
+ && !global_regs[i])
+ {
+ cfun_set_fpr_save (i);
+ cfun_frame_layout.high_fprs++;
+ }
+ }
+
for (i = 0; i < 16; i++)
clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
min_fpr = 0;
for (i = min_fpr; i < max_fpr; i++)
- cfun_set_fpr_bit (i);
+ cfun_set_fpr_save (i + FPR0_REGNUM);
}
}
if (!TARGET_64BIT)
- for (i = 2; i < 4; i++)
- if (df_regs_ever_live_p (i + 16) && !global_regs[i + 16])
- cfun_set_fpr_bit (i);
+ {
+ if (df_regs_ever_live_p (FPR4_REGNUM) && !global_regs[FPR4_REGNUM])
+ cfun_set_fpr_save (FPR4_REGNUM);
+ if (df_regs_ever_live_p (FPR6_REGNUM) && !global_regs[FPR6_REGNUM])
+ cfun_set_fpr_save (FPR6_REGNUM);
+ }
}
/* Fill cfun->machine with info about frame of current function. */
{
cfun_frame_layout.f4_offset
= (cfun_frame_layout.gprs_offset
- - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
+ - 8 * (cfun_fpr_save_p (FPR4_REGNUM)
+ + cfun_fpr_save_p (FPR6_REGNUM)));
cfun_frame_layout.f0_offset
= (cfun_frame_layout.f4_offset
- - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
+ - 8 * (cfun_fpr_save_p (FPR0_REGNUM)
+ + cfun_fpr_save_p (FPR2_REGNUM)));
}
else
{
cfun_frame_layout.f0_offset
= ((cfun_frame_layout.gprs_offset
& ~(STACK_BOUNDARY / BITS_PER_UNIT - 1))
- - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
+ - 8 * (cfun_fpr_save_p (FPR0_REGNUM)
+ + cfun_fpr_save_p (FPR2_REGNUM)));
cfun_frame_layout.f4_offset
= (cfun_frame_layout.f0_offset
- - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
+ - 8 * (cfun_fpr_save_p (FPR4_REGNUM)
+ + cfun_fpr_save_p (FPR6_REGNUM)));
}
}
else /* no backchain */
{
cfun_frame_layout.f4_offset
= (STACK_POINTER_OFFSET
- - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
+ - 8 * (cfun_fpr_save_p (FPR4_REGNUM)
+ + cfun_fpr_save_p (FPR6_REGNUM)));
cfun_frame_layout.f0_offset
= (cfun_frame_layout.f4_offset
- - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
+ - 8 * (cfun_fpr_save_p (FPR0_REGNUM)
+ + cfun_fpr_save_p (FPR2_REGNUM)));
cfun_frame_layout.gprs_offset
= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
- for (i = 0; i < 8; i++)
- if (cfun_fpr_bit_p (i))
+ for (i = FPR0_REGNUM; i <= FPR7_REGNUM; i++)
+ if (cfun_fpr_save_p (i))
cfun_frame_layout.frame_size += 8;
cfun_frame_layout.frame_size += cfun_gprs_save_area_size;
{
HOST_WIDE_INT frame_size;
int base_used;
- int clobbered_regs[16];
+ int clobbered_regs[32];
/* On S/390 machines, we may need to perform branch splitting, which
will require both base and return address register. We have no
while (frame_size != cfun_frame_layout.frame_size);
}
+/* Remove the FPR clobbers from a tbegin insn if it can be proven that
+ the TX is nonescaping. A transaction is considered escaping if
+ there is at least one path from tbegin returning CC0 to the
+ function exit block without an tend.
+
+ The check so far has some limitations:
+ - only single tbegin/tend BBs are supported
+ - the first cond jump after tbegin must separate the CC0 path from ~CC0
+ - when CC is copied to a GPR and the CC0 check is done with the GPR
+ this is not supported
+*/
+
+static void
+s390_optimize_nonescaping_tx (void)
+{
+ const unsigned int CC0 = 1 << 3;
+ basic_block tbegin_bb = NULL;
+ basic_block tend_bb = NULL;
+ basic_block bb;
+ rtx insn;
+ bool result = true;
+ int bb_index;
+ rtx tbegin_insn = NULL_RTX;
+
+ if (!cfun->machine->tbegin_p)
+ return;
+
+ for (bb_index = 0; bb_index < n_basic_blocks; bb_index++)
+ {
+ bb = BASIC_BLOCK (bb_index);
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ rtx ite, cc, pat, target;
+ unsigned HOST_WIDE_INT mask;
+
+ if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+ continue;
+
+ pat = PATTERN (insn);
+
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+
+ if (GET_CODE (pat) != SET
+ || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
+ continue;
+
+ if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
+ {
+ rtx tmp;
+
+ tbegin_insn = insn;
+
+ /* Just return if the tbegin doesn't have clobbers. */
+ if (GET_CODE (PATTERN (insn)) != PARALLEL)
+ return;
+
+ if (tbegin_bb != NULL)
+ return;
+
+ /* Find the next conditional jump. */
+ for (tmp = NEXT_INSN (insn);
+ tmp != NULL_RTX;
+ tmp = NEXT_INSN (tmp))
+ {
+ if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
+ return;
+ if (!JUMP_P (tmp))
+ continue;
+
+ ite = SET_SRC (PATTERN (tmp));
+ if (GET_CODE (ite) != IF_THEN_ELSE)
+ continue;
+
+ cc = XEXP (XEXP (ite, 0), 0);
+ if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
+ || GET_MODE (cc) != CCRAWmode
+ || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
+ return;
+
+ if (bb->succs->length () != 2)
+ return;
+
+ mask = INTVAL (XEXP (XEXP (ite, 0), 1));
+ if (GET_CODE (XEXP (ite, 0)) == NE)
+ mask ^= 0xf;
+
+ if (mask == CC0)
+ target = XEXP (ite, 1);
+ else if (mask == (CC0 ^ 0xf))
+ target = XEXP (ite, 2);
+ else
+ return;
+
+ {
+ edge_iterator ei;
+ edge e1, e2;
+
+ ei = ei_start (bb->succs);
+ e1 = ei_safe_edge (ei);
+ ei_next (&ei);
+ e2 = ei_safe_edge (ei);
+
+ if (e2->flags & EDGE_FALLTHRU)
+ {
+ e2 = e1;
+ e1 = ei_safe_edge (ei);
+ }
+
+ if (!(e1->flags & EDGE_FALLTHRU))
+ return;
+
+ tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
+ }
+ if (tmp == BB_END (bb))
+ break;
+ }
+ }
+
+ if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
+ {
+ if (tend_bb != NULL)
+ return;
+ tend_bb = bb;
+ }
+ }
+ }
+
+ /* Either we successfully remove the FPR clobbers here or we are not
+ able to do anything for this TX. Both cases don't qualify for
+ another look. */
+ cfun->machine->tbegin_p = false;
+
+ if (tbegin_bb == NULL || tend_bb == NULL)
+ return;
+
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
+ free_dominance_info (CDI_POST_DOMINATORS);
+
+ if (!result)
+ return;
+
+ PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0);
+ INSN_CODE (tbegin_insn) = -1;
+ df_insn_rescan (tbegin_insn);
+
+ return;
+}
+
/* Update frame layout. Recompute actual register save data based on
current info and update regs_ever_live for the special registers.
May be called multiple times, but may never cause *more* registers
static void
s390_update_frame_layout (void)
{
- int clobbered_regs[16];
+ int clobbered_regs[32];
s390_register_info (clobbered_regs);
return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
}
+/* Return true if we use LRA instead of reload pass. */
+static bool
+s390_lra_p (void)
+{
+ return s390_lra_flag;
+}
+
/* Return true if register FROM can be eliminated via register TO. */
static bool
int offset;
int next_fpr = 0;
- /* Complete frame layout. */
+ /* Try to get rid of the FPR clobbers. */
+ s390_optimize_nonescaping_tx ();
+ /* Complete frame layout. */
s390_update_frame_layout ();
/* Annotate all constant pool references to let the scheduler know
offset = cfun_frame_layout.f0_offset;
/* Save f0 and f2. */
- for (i = 0; i < 2; i++)
+ for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
{
- if (cfun_fpr_bit_p (i))
+ if (cfun_fpr_save_p (i))
{
- save_fpr (stack_pointer_rtx, offset, i + 16);
+ save_fpr (stack_pointer_rtx, offset, i);
offset += 8;
}
else if (!TARGET_PACKED_STACK)
/* Save f4 and f6. */
offset = cfun_frame_layout.f4_offset;
- for (i = 2; i < 4; i++)
+ for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
{
- if (cfun_fpr_bit_p (i))
+ if (cfun_fpr_save_p (i))
{
- insn = save_fpr (stack_pointer_rtx, offset, i + 16);
+ insn = save_fpr (stack_pointer_rtx, offset, i);
offset += 8;
/* If f4 and f6 are call clobbered they are saved due to stdargs and
therefore are not frame related. */
- if (!call_really_used_regs[i + 16])
+ if (!call_really_used_regs[i])
RTX_FRAME_RELATED_P (insn) = 1;
}
else if (!TARGET_PACKED_STACK)
offset = (cfun_frame_layout.f8_offset
+ (cfun_frame_layout.high_fprs - 1) * 8);
- for (i = 15; i > 7 && offset >= 0; i--)
- if (cfun_fpr_bit_p (i))
+ for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
+ if (cfun_fpr_save_p (i))
{
- insn = save_fpr (stack_pointer_rtx, offset, i + 16);
+ insn = save_fpr (stack_pointer_rtx, offset, i);
RTX_FRAME_RELATED_P (insn) = 1;
offset -= 8;
}
if (offset >= cfun_frame_layout.f8_offset)
- next_fpr = i + 16;
+ next_fpr = i;
}
if (!TARGET_PACKED_STACK)
- next_fpr = cfun_save_high_fprs_p ? 31 : 0;
+ next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
if (flag_stack_usage_info)
current_function_static_stack_size = cfun_frame_layout.frame_size;
offset = 0;
- for (i = 24; i <= next_fpr; i++)
- if (cfun_fpr_bit_p (i - 16))
+ for (i = FPR8_REGNUM; i <= next_fpr; i++)
+ if (cfun_fpr_save_p (i))
{
rtx addr = plus_constant (Pmode, stack_pointer_rtx,
cfun_frame_layout.frame_size
if (cfun_save_high_fprs_p)
{
next_offset = cfun_frame_layout.f8_offset;
- for (i = 24; i < 32; i++)
+ for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
{
- if (cfun_fpr_bit_p (i - 16))
+ if (cfun_fpr_save_p (i))
{
restore_fpr (frame_pointer,
offset + next_offset, i);
else
{
next_offset = cfun_frame_layout.f4_offset;
- for (i = 18; i < 20; i++)
+ /* f4, f6 */
+ for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
{
- if (cfun_fpr_bit_p (i - 16))
+ if (cfun_fpr_save_p (i))
{
restore_fpr (frame_pointer,
offset + next_offset, i);
return build_va_arg_indirect_ref (addr);
}
+/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
+ expanders.
+ DEST - Register location where CC will be stored.
+ TDB - Pointer to a 256 byte area where to store the transaction.
+ diagnostic block. NULL if TDB is not needed.
+ RETRY - Retry count value. If non-NULL a retry loop for CC2
+ is emitted
+ CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
+ of the tbegin instruction pattern. */
+
+void
+s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
+{
+ const int CC0 = 1 << 3;
+ const int CC1 = 1 << 2;
+ const int CC3 = 1 << 0;
+ rtx abort_label = gen_label_rtx ();
+ rtx leave_label = gen_label_rtx ();
+ rtx retry_reg = gen_reg_rtx (SImode);
+ rtx retry_label = NULL_RTX;
+ rtx jump;
+ rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
+
+ if (retry != NULL_RTX)
+ {
+ emit_move_insn (retry_reg, retry);
+ retry_label = gen_label_rtx ();
+ emit_label (retry_label);
+ }
+
+ if (clobber_fprs_p)
+ emit_insn (gen_tbegin_1 (tdb,
+ gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+ else
+ emit_insn (gen_tbegin_nofloat_1 (tdb,
+ gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+
+ jump = s390_emit_jump (abort_label,
+ gen_rtx_NE (VOIDmode,
+ gen_rtx_REG (CCRAWmode, CC_REGNUM),
+ gen_rtx_CONST_INT (VOIDmode, CC0)));
+
+ JUMP_LABEL (jump) = abort_label;
+ LABEL_NUSES (abort_label) = 1;
+ add_reg_note (jump, REG_BR_PROB, very_unlikely);
+
+ /* Initialize CC return value. */
+ emit_move_insn (dest, const0_rtx);
+
+ s390_emit_jump (leave_label, NULL_RTX);
+ LABEL_NUSES (leave_label) = 1;
+ emit_barrier ();
+
+ /* Abort handler code. */
+
+ emit_label (abort_label);
+ if (retry != NULL_RTX)
+ {
+ rtx count = gen_reg_rtx (SImode);
+ jump = s390_emit_jump (leave_label,
+ gen_rtx_EQ (VOIDmode,
+ gen_rtx_REG (CCRAWmode, CC_REGNUM),
+ gen_rtx_CONST_INT (VOIDmode, CC1 | CC3)));
+ LABEL_NUSES (leave_label) = 2;
+ add_reg_note (jump, REG_BR_PROB, very_unlikely);
+
+ /* CC2 - transient failure. Perform retry with ppa. */
+ emit_move_insn (count, retry);
+ emit_insn (gen_subsi3 (count, count, retry_reg));
+ emit_insn (gen_tx_assist (count));
+ jump = emit_jump_insn (gen_doloop_si64 (retry_label,
+ retry_reg,
+ retry_reg));
+ JUMP_LABEL (jump) = retry_label;
+ LABEL_NUSES (retry_label) = 1;
+ }
+
+ emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
+ gen_rtvec (1, gen_rtx_REG (CCRAWmode,
+ CC_REGNUM)),
+ UNSPEC_CC_TO_INT));
+ emit_label (leave_label);
+}
/* Builtins. */
enum s390_builtin
{
- S390_BUILTIN_THREAD_POINTER,
- S390_BUILTIN_SET_THREAD_POINTER,
+ S390_BUILTIN_TBEGIN,
+ S390_BUILTIN_TBEGIN_NOFLOAT,
+ S390_BUILTIN_TBEGIN_RETRY,
+ S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+ S390_BUILTIN_TBEGINC,
+ S390_BUILTIN_TEND,
+ S390_BUILTIN_TABORT,
+ S390_BUILTIN_NON_TX_STORE,
+ S390_BUILTIN_TX_NESTING_DEPTH,
+ S390_BUILTIN_TX_ASSIST,
S390_BUILTIN_max
};
-static enum insn_code const code_for_builtin_64[S390_BUILTIN_max] = {
- CODE_FOR_get_tp_64,
- CODE_FOR_set_tp_64
-};
-
-static enum insn_code const code_for_builtin_31[S390_BUILTIN_max] = {
- CODE_FOR_get_tp_31,
- CODE_FOR_set_tp_31
+static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
+ CODE_FOR_tbegin,
+ CODE_FOR_tbegin_nofloat,
+ CODE_FOR_tbegin_retry,
+ CODE_FOR_tbegin_retry_nofloat,
+ CODE_FOR_tbeginc,
+ CODE_FOR_tend,
+ CODE_FOR_tabort,
+ CODE_FOR_ntstg,
+ CODE_FOR_etnd,
+ CODE_FOR_tx_assist
};
static void
s390_init_builtins (void)
{
- tree ftype;
-
- ftype = build_function_type_list (ptr_type_node, NULL_TREE);
- add_builtin_function ("__builtin_thread_pointer", ftype,
- S390_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+ tree ftype, uint64_type;
+
+ /* void foo (void) */
+ ftype = build_function_type_list (void_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
+ BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* void foo (int) */
+ ftype = build_function_type_list (void_type_node, integer_type_node,
+ NULL_TREE);
+ add_builtin_function ("__builtin_tabort", ftype,
+ S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE);
+ add_builtin_function ("__builtin_tx_assist", ftype,
+ S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* int foo (void *) */
+ ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
+ BUILT_IN_MD, NULL, NULL_TREE);
+ add_builtin_function ("__builtin_tbegin_nofloat", ftype,
+ S390_BUILTIN_TBEGIN_NOFLOAT,
+ BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* int foo (void *, int) */
+ ftype = build_function_type_list (integer_type_node, ptr_type_node,
+ integer_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tbegin_retry", ftype,
+ S390_BUILTIN_TBEGIN_RETRY,
+ BUILT_IN_MD,
NULL, NULL_TREE);
-
- ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
- add_builtin_function ("__builtin_set_thread_pointer", ftype,
- S390_BUILTIN_SET_THREAD_POINTER, BUILT_IN_MD,
+ add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
+ S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+ BUILT_IN_MD,
NULL, NULL_TREE);
+
+ /* int foo (void) */
+ ftype = build_function_type_list (integer_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tx_nesting_depth", ftype,
+ S390_BUILTIN_TX_NESTING_DEPTH,
+ BUILT_IN_MD, NULL, NULL_TREE);
+ add_builtin_function ("__builtin_tend", ftype,
+ S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* void foo (uint64_t *, uint64_t) */
+ if (TARGET_64BIT)
+ uint64_type = long_unsigned_type_node;
+ else
+ uint64_type = long_long_unsigned_type_node;
+
+ ftype = build_function_type_list (void_type_node,
+ build_pointer_type (uint64_type),
+ uint64_type, NULL_TREE);
+ add_builtin_function ("__builtin_non_tx_store", ftype,
+ S390_BUILTIN_NON_TX_STORE,
+ BUILT_IN_MD, NULL, NULL_TREE);
}
/* Expand an expression EXP that calls a built-in function,
{
#define MAX_ARGS 2
- enum insn_code const *code_for_builtin =
- TARGET_64BIT ? code_for_builtin_64 : code_for_builtin_31;
-
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
enum insn_code icode;
if (icode == 0)
internal_error ("bad builtin fcode");
+ if (!TARGET_ZEC12)
+ error ("Transactional execution builtins require zEC12 or later\n");
+
+ if (!TARGET_HTM && TARGET_ZEC12)
+ error ("Transactional execution builtins not enabled (-mtx)\n");
+
+ /* Set a flag in the machine specific cfun part in order to support
+ saving/restoring of FPRs. */
+ if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
+ cfun->machine->tbegin_p = true;
+
nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
arity = 0;
if (arg == error_mark_node)
return NULL_RTX;
- if (arity > MAX_ARGS)
+ if (arity >= MAX_ARGS)
return NULL_RTX;
insn_op = &insn_data[icode].operand[arity + nonvoid];
op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
if (!(*insn_op->predicate) (op[arity], insn_op->mode))
- op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+ {
+ if (insn_op->predicate == memory_operand)
+ {
+ /* Don't move a NULL pointer into a register. Otherwise
+ we have to rely on combine being able to move it back
+ in order to get an immediate 0 in the instruction. */
+ if (op[arity] != const0_rtx)
+ op[arity] = copy_to_mode_reg (Pmode, op[arity]);
+ op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
+ }
+ else
+ op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+ }
+
arity++;
}
pat = GEN_FCN (icode) (op[0]);
break;
case 2:
- pat = GEN_FCN (icode) (target, op[0], op[1]);
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ else
+ pat = GEN_FCN (icode) (op[0], op[1]);
break;
default:
gcc_unreachable ();
}
if (TARGET_64BIT)
{
- for (i = 24; i < 32; i++)
+ for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
call_used_regs[i] = call_really_used_regs[i] = 0;
}
else
{
- for (i = 18; i < 20; i++)
- call_used_regs[i] = call_really_used_regs[i] = 0;
+ call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
+ call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
}
if (TARGET_SOFT_FLOAT)
{
- for (i = 16; i < 32; i++)
+ for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
call_used_regs[i] = fixed_regs[i] = 1;
}
}
next_insn = NEXT_INSN (insn);
- if (GET_CODE (insn) != INSN)
+ if (! NONJUMP_INSN_P (insn))
continue;
if (GET_CODE (PATTERN (insn)) == PARALLEL
/* Walk over the insns and do some >=z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10
- || s390_tune == PROCESSOR_2817_Z196)
+ || s390_tune == PROCESSOR_2817_Z196
+ || s390_tune == PROCESSOR_2827_ZEC12)
{
rtx insn;
bool insn_added_p = false;
ready[0] = tmp;
}
+
+/* The s390_sched_state variable tracks the state of the current or
+ the last instruction group.
+
+ 0,1,2 number of instructions scheduled in the current group
+ 3 the last group is complete - normal insns
+ 4 the last group was a cracked/expanded insn */
+
+static int s390_sched_state;
+
+#define S390_OOO_SCHED_STATE_NORMAL 3
+#define S390_OOO_SCHED_STATE_CRACKED 4
+
+#define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
+#define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
+#define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
+#define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
+
+static unsigned int
+s390_get_sched_attrmask (rtx insn)
+{
+ unsigned int mask = 0;
+
+ if (get_attr_ooo_cracked (insn))
+ mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
+ if (get_attr_ooo_expanded (insn))
+ mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
+ if (get_attr_ooo_endgroup (insn))
+ mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
+ if (get_attr_ooo_groupalone (insn))
+ mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
+ return mask;
+}
+
+/* Return the scheduling score for INSN. The higher the score the
+ better. The score is calculated from the OOO scheduling attributes
+ of INSN and the scheduling state s390_sched_state. */
+static int
+s390_sched_score (rtx insn)
+{
+ unsigned int mask = s390_get_sched_attrmask (insn);
+ int score = 0;
+
+ switch (s390_sched_state)
+ {
+ case 0:
+ /* Try to put insns into the first slot which would otherwise
+ break a group. */
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+ || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
+ score += 5;
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
+ score += 10;
+ case 1:
+ /* Prefer not cracked insns while trying to put together a
+ group. */
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+ && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
+ && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
+ score += 10;
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
+ score += 5;
+ break;
+ case 2:
+ /* Prefer not cracked insns while trying to put together a
+ group. */
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+ && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
+ && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
+ score += 10;
+ /* Prefer endgroup insns in the last slot. */
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
+ score += 10;
+ break;
+ case S390_OOO_SCHED_STATE_NORMAL:
+ /* Prefer not cracked insns if the last was not cracked. */
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
+ && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
+ score += 5;
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
+ score += 10;
+ break;
+ case S390_OOO_SCHED_STATE_CRACKED:
+ /* Try to keep cracked insns together to prevent them from
+ interrupting groups. */
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+ || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
+ score += 5;
+ break;
+ }
+ return score;
+}
+
/* This function is called via hook TARGET_SCHED_REORDER before
- issuing one insn from list READY which contains *NREADYP entries.
+ issueing one insn from list READY which contains *NREADYP entries.
For target z10 it reorders load instructions to avoid early load
conflicts in the floating point pipeline */
static int
-s390_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+s390_sched_reorder (FILE *file, int verbose,
rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
{
if (s390_tune == PROCESSOR_2097_Z10)
if (reload_completed && *nreadyp > 1)
s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
+ if (s390_tune == PROCESSOR_2827_ZEC12
+ && reload_completed
+ && *nreadyp > 1)
+ {
+ int i;
+ int last_index = *nreadyp - 1;
+ int max_index = -1;
+ int max_score = -1;
+ rtx tmp;
+
+ /* Just move the insn with the highest score to the top (the
+ end) of the list. A full sort is not needed since a conflict
+ in the hazard recognition cannot happen. So the top insn in
+ the ready list will always be taken. */
+ for (i = last_index; i >= 0; i--)
+ {
+ int score;
+
+ if (recog_memoized (ready[i]) < 0)
+ continue;
+
+ score = s390_sched_score (ready[i]);
+ if (score > max_score)
+ {
+ max_score = score;
+ max_index = i;
+ }
+ }
+
+ if (max_index != -1)
+ {
+ if (max_index != last_index)
+ {
+ tmp = ready[max_index];
+ ready[max_index] = ready[last_index];
+ ready[last_index] = tmp;
+
+ if (verbose > 5)
+ fprintf (file,
+ "move insn %d to the top of list\n",
+ INSN_UID (ready[last_index]));
+ }
+ else if (verbose > 5)
+ fprintf (file,
+ "best insn %d already on top\n",
+ INSN_UID (ready[last_index]));
+ }
+
+ if (verbose > 5)
+ {
+ fprintf (file, "ready list ooo attributes - sched state: %d\n",
+ s390_sched_state);
+
+ for (i = last_index; i >= 0; i--)
+ {
+ if (recog_memoized (ready[i]) < 0)
+ continue;
+ fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
+ s390_sched_score (ready[i]));
+#define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
+ PRINT_OOO_ATTR (ooo_cracked);
+ PRINT_OOO_ATTR (ooo_expanded);
+ PRINT_OOO_ATTR (ooo_endgroup);
+ PRINT_OOO_ATTR (ooo_groupalone);
+#undef PRINT_OOO_ATTR
+ fprintf (file, "\n");
+ }
+ }
+ }
+
return s390_issue_rate ();
}
+
/* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
the scheduler has issued INSN. It stores the last issued insn into
last_scheduled_insn in order to make it available for
s390_sched_reorder. */
static int
-s390_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
- int verbose ATTRIBUTE_UNUSED,
- rtx insn, int more)
+s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
{
last_scheduled_insn = insn;
+ if (s390_tune == PROCESSOR_2827_ZEC12
+ && reload_completed
+ && recog_memoized (insn) >= 0)
+ {
+ unsigned int mask = s390_get_sched_attrmask (insn);
+
+ if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
+ || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
+ s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
+ else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
+ || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
+ s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
+ else
+ {
+ /* Only normal insns are left (mask == 0). */
+ switch (s390_sched_state)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case S390_OOO_SCHED_STATE_NORMAL:
+ if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
+ s390_sched_state = 1;
+ else
+ s390_sched_state++;
+
+ break;
+ case S390_OOO_SCHED_STATE_CRACKED:
+ s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
+ break;
+ }
+ }
+ if (verbose > 5)
+ {
+ fprintf (file, "insn %d: ", INSN_UID (insn));
+#define PRINT_OOO_ATTR(ATTR) \
+ fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
+ PRINT_OOO_ATTR (ooo_cracked);
+ PRINT_OOO_ATTR (ooo_expanded);
+ PRINT_OOO_ATTR (ooo_endgroup);
+ PRINT_OOO_ATTR (ooo_groupalone);
+#undef PRINT_OOO_ATTR
+ fprintf (file, "\n");
+ fprintf (file, "sched state: %d\n", s390_sched_state);
+ }
+ }
+
if (GET_CODE (PATTERN (insn)) != USE
&& GET_CODE (PATTERN (insn)) != CLOBBER)
return more - 1;
int max_ready ATTRIBUTE_UNUSED)
{
last_scheduled_insn = NULL_RTX;
+ s390_sched_state = 0;
}
/* This function checks the whole of insn X for memory references. The
unsigned i;
unsigned mem_count = 0;
- if (s390_tune != PROCESSOR_2097_Z10 && s390_tune != PROCESSOR_2817_Z196)
+ if (s390_tune != PROCESSOR_2097_Z10
+ && s390_tune != PROCESSOR_2817_Z196
+ && s390_tune != PROCESSOR_2827_ZEC12)
return nunroll;
/* Count the number of memory references within the loop body. */
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
+#undef TARGET_LRA_P
+#define TARGET_LRA_P s390_lra_p
+
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE s390_can_eliminate
#undef TARGET_UNWIND_WORD_MODE
#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"