/* Subroutines used for code generation on IBM RS/6000.
- Copyright (C) 1991-2019 Free Software Foundation, Inc.
+ Copyright (C) 1991-2020 Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
This file is part of GCC.
#include "gimple-ssa.h"
#include "gimple-walk.h"
#include "intl.h"
-#include "params.h"
#include "tm-constrs.h"
#include "tree-vectorizer.h"
#include "target-globals.h"
#include "tree-vrp.h"
#include "tree-ssanames.h"
#include "rs6000-internal.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
#endif
#endif
-/* Support targetm.vectorize.builtin_mask_for_load. */
-GTY(()) tree altivec_builtin_mask_for_load;
+/* Don't enable PC-relative addressing if the target does not support it. */
+#ifndef PCREL_SUPPORTED_BY_OS
+#define PCREL_SUPPORTED_BY_OS 0
+#endif
-/* Set to nonzero once AIX common-mode calls have been defined. */
-static GTY(()) int common_mode_defined;
+/* Support targetm.vectorize.builtin_mask_for_load. */
+tree altivec_builtin_mask_for_load;
#ifdef USING_ELFOS_H
/* Counter for labels which are to be placed in .fixup. */
static GTY(()) section *read_only_private_data_section;
static GTY(()) section *sdata2_section;
-extern GTY(()) section *toc_section;
section *toc_section = 0;
/* Describe the vector unit used for modes. */
int rs6000_vector_align[NUM_MACHINE_MODES];
/* Map selected modes to types for builtins. */
-GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
+tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
/* What modes to automatically generate reciprocal divide estimate (fre) and
reciprocal sqrt (frsqrte) for. */
machine_mode,
secondary_reload_info *,
bool);
+static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
/* Hash table stuff for keeping track of TOC entries. */
#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
#define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
+
+#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
+#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
+ rs6000_cannot_substitute_mem_equiv_p
\f
/* Processor table. */
spaces = 0;
}
else
- spaces += sizeof (" Reload=sl") - 1;
+ spaces += strlen (" Reload=sl");
if (reg_addr[m].scalar_in_vmx_p)
{
spaces = 0;
}
else
- spaces += sizeof (" Upper=y") - 1;
+ spaces += strlen (" Upper=y");
if (rs6000_vector_unit[m] != VECTOR_NONE
|| rs6000_vector_mem[m] != VECTOR_NONE)
&& TARGET_HARD_FLOAT
&& !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
| ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
- | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
+ | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
+ | ((TARGET_FUTURE) ? RS6000_BTM_FUTURE : 0));
}
/* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
}
+ if (!TARGET_FPRND && TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
+ /* TARGET_VSX = 1 implies Power 7 and newer */
+ error ("%qs requires %qs", "-mvsx", "-mfprnd");
+ rs6000_isa_flags &= ~OPTION_MASK_FPRND;
+ }
+
if (TARGET_DIRECT_MOVE && !TARGET_VSX)
{
if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
}
/* Enable the default support for IEEE 128-bit floating point on Linux VSX
- sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
+ sytems. In GCC 7, we would enable the IEEE 128-bit floating point
infrastructure (-mfloat128-type) but not enable the actual __float128 type
unless the user used the explicit -mfloat128. In GCC 8, we enable both
the keyword as well as the type. */
if (!TARGET_VSX)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
- error ("%qs requires VSX support", "%<-mfloat128%>");
+ error ("%qs requires VSX support", "-mfloat128");
TARGET_FLOAT128_TYPE = 0;
rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
}
- /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
- if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
+ /* Enable -mprefixed by default on 'future' systems. */
+ if (TARGET_FUTURE && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
+ rs6000_isa_flags |= OPTION_MASK_PREFIXED;
+
+ /* -mprefixed requires -mcpu=future. */
+ else if (TARGET_PREFIXED && !TARGET_FUTURE)
{
- if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
- error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
- else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
- error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
+ error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
- rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
+ rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
}
/* -mpcrel requires prefixed load/store addressing. */
- if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
+ if (TARGET_PCREL && !TARGET_PREFIXED)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
- error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
+ error ("%qs requires %qs", "-mpcrel", "-mprefixed");
rs6000_isa_flags &= ~OPTION_MASK_PCREL;
}
SUB3TARGET_OVERRIDE_OPTIONS;
#endif
+ /* If the ABI has support for PC-relative relocations, enable it by default.
+ This test depends on the sub-target tests above setting the code model to
+ medium for ELF v2 systems. */
+ if (PCREL_SUPPORTED_BY_OS
+ && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
+ rs6000_isa_flags |= OPTION_MASK_PCREL;
+
/* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
after the subtarget override options are done. */
- if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
+ else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
str_align_loops = "16";
}
}
-
- if (flag_align_jumps && !str_align_jumps)
- str_align_jumps = "16";
- if (flag_align_loops && !str_align_loops)
- str_align_loops = "16";
}
/* Arrange to save and restore machine status around nested functions. */
if (global_init_p)
{
- maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
- rs6000_cost->simultaneous_prefetches,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
- rs6000_cost->cache_line_size,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
- global_options.x_param_values,
- global_options_set.x_param_values);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_simultaneous_prefetches,
+ rs6000_cost->simultaneous_prefetches);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_l1_cache_size,
+ rs6000_cost->l1_cache_size);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_l1_cache_line_size,
+ rs6000_cost->cache_line_size);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_l2_cache_size,
+ rs6000_cost->l2_cache_size);
/* Increase loop peeling limits based on performance analysis. */
- maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
- global_options.x_param_values,
- global_options_set.x_param_values);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_max_peeled_insns, 400);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_max_completely_peeled_insns, 400);
/* Use the 'model' -fsched-pressure algorithm by default. */
- maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
- SCHED_PRESSURE_MODEL,
- global_options.x_param_values,
- global_options_set.x_param_values);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_sched_pressure_algorithm,
+ SCHED_PRESSURE_MODEL);
- /* Explicit -funroll-loops turns -munroll-only-small-loops off. */
- if (((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
+ /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
+ turns -frename-registers on. */
+ if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
|| (global_options_set.x_flag_unroll_all_loops
&& flag_unroll_all_loops))
- && !global_options_set.x_unroll_only_small_loops)
- unroll_only_small_loops = 0;
+ {
+ if (!global_options_set.x_unroll_only_small_loops)
+ unroll_only_small_loops = 0;
+ if (!global_options_set.x_flag_rename_registers)
+ flag_rename_registers = 1;
+ }
/* If using typedef char *va_list, signal that
__builtin_va_start (&ap, 0) can be optimized to
static machine_mode
rs6000_preferred_simd_mode (scalar_mode mode)
{
- if (TARGET_VSX)
- switch (mode)
- {
- case E_DFmode:
- return V2DFmode;
- default:;
- }
- if (TARGET_ALTIVEC || TARGET_VSX)
- switch (mode)
- {
- case E_SFmode:
- return V4SFmode;
- case E_TImode:
- return V1TImode;
- case E_DImode:
- return V2DImode;
- case E_SImode:
- return V4SImode;
- case E_HImode:
- return V8HImode;
- case E_QImode:
- return V16QImode;
- default:;
- }
+ opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
+
+ if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
+ return vmode.require ();
+
return word_mode;
}
return data;
}
+/* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
+ For some statement, we would like to further fine-grain tweak the cost on
+ top of rs6000_builtin_vectorization_cost handling which doesn't have any
+ information on statement operation codes etc. One typical case here is
+ COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
+ for scalar cost, but it should be priced more whatever transformed to either
+ compare + branch or compare + isel instructions. */
+
+static unsigned
+adjust_vectorization_cost (enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info)
+{
+ if (kind == scalar_stmt && stmt_info && stmt_info->stmt
+ && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
+ {
+ tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
+ if (subcode == COND_EXPR)
+ return 2;
+ }
+
+ return 0;
+}
+
/* Implement targetm.vectorize.add_stmt_cost. */
static unsigned
-rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
- struct _stmt_vec_info *stmt_info, int misalign,
- enum vect_cost_model_location where)
+rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
+ enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info, tree vectype,
+ int misalign, enum vect_cost_model_location where)
{
rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
unsigned retval = 0;
if (flag_vect_cost_model)
{
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
misalign);
+ stmt_cost += adjust_vectorization_cost (kind, stmt_info);
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
- if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ if (where == vect_body && stmt_info
+ && stmt_in_inner_loop_p (vinfo, stmt_info))
count *= 50; /* FIXME. */
retval = (unsigned) (count * stmt_cost);
if (!bname)
return NULL_TREE;
- strcpy (name, bname + sizeof ("__builtin_") - 1);
+ strcpy (name, bname + strlen ("__builtin_"));
strcat (name, suffix);
if (n_args == 1)
num_insns_constant_gpr (HOST_WIDE_INT value)
{
/* signed constant loadable with addi */
- if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
+ if (SIGNED_INTEGER_16BIT_P (value))
return 1;
/* constant loadable with addis */
&& (value >> 31 == -1 || value >> 31 == 0))
return 1;
+ /* PADDI can support up to 34 bit signed integers. */
+ else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
+ return 1;
+
else if (TARGET_POWERPC64)
{
HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
&& rs6000_is_valid_and_mask (GEN_INT (low), DImode))
insns = 2;
total += insns;
- value >>= BITS_PER_WORD;
+ /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
+ it all at once would be UB. */
+ value >>= (BITS_PER_WORD - 1);
+ value >>= 1;
}
return total;
}
}
}
+/* Return the offset within a memory object (MEM) of a vector type to a given
+ element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
+ the element is constant, we return a constant integer.
+
+ Otherwise, we use a base register temporary to calculate the offset after
+ masking it to fit within the bounds of the vector and scaling it. The
+ masking is required by the 64-bit ELF version 2 ABI for the vec_extract
+ built-in function. */
+
+static rtx
+get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
+{
+ if (CONST_INT_P (element))
+ return GEN_INT (INTVAL (element) * scalar_size);
+
+ /* All insns should use the 'Q' constraint (address is a single register) if
+ the element number is not a constant. */
+ gcc_assert (satisfies_constraint_Q (mem));
+
+ /* Mask the element to make sure the element number is between 0 and the
+ maximum number of elements - 1 so that we don't generate an address
+ outside the vector. */
+ rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
+ rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
+ emit_insn (gen_rtx_SET (base_tmp, and_op));
+
+ /* Shift the element to get the byte offset from the element number. */
+ int shift = exact_log2 (scalar_size);
+ gcc_assert (shift >= 0);
+
+ if (shift > 0)
+ {
+ rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+ emit_insn (gen_rtx_SET (base_tmp, shift_op));
+ }
+
+ return base_tmp;
+}
+
+/* Helper function update PC-relative addresses when we are adjusting a memory
+ address (ADDR) to a vector to point to a scalar field within the vector with
+ a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
+ use the base register temporary (BASE_TMP) to form the address. */
+
+static rtx
+adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
+{
+ rtx new_addr = NULL;
+
+ gcc_assert (CONST_INT_P (element_offset));
+
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx op0 = XEXP (addr, 0);
+ rtx op1 = XEXP (addr, 1);
+
+ if (CONST_INT_P (op1))
+ {
+ HOST_WIDE_INT offset
+ = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
+
+ if (offset == 0)
+ new_addr = op0;
+
+ else
+ {
+ rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
+ new_addr = gen_rtx_CONST (Pmode, plus);
+ }
+ }
+
+ else
+ {
+ emit_move_insn (base_tmp, addr);
+ new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
+ }
+ }
+
+ else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
+ {
+ rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
+ new_addr = gen_rtx_CONST (Pmode, plus);
+ }
+
+ else
+ gcc_unreachable ();
+
+ return new_addr;
+}
+
/* Adjust a memory address (MEM) of a vector type to point to a scalar field
within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
temporary (BASE_TMP) to fixup the address. Return the new memory address
- that is valid for reads or writes to a given register (SCALAR_REG). */
+ that is valid for reads or writes to a given register (SCALAR_REG).
+
+ This function is expected to be called after reload is completed when we are
+ splitting insns. The temporary BASE_TMP might be set multiple times with
+ this code. */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
{
unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
rtx addr = XEXP (mem, 0);
- rtx element_offset;
rtx new_addr;
- bool valid_addr_p;
+
+ gcc_assert (!reg_mentioned_p (base_tmp, addr));
+ gcc_assert (!reg_mentioned_p (base_tmp, element));
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
/* Calculate what we need to add to the address to get the element
address. */
- if (CONST_INT_P (element))
- element_offset = GEN_INT (INTVAL (element) * scalar_size);
- else
- {
- int byte_shift = exact_log2 (scalar_size);
- gcc_assert (byte_shift >= 0);
-
- if (byte_shift == 0)
- element_offset = element;
-
- else
- {
- if (TARGET_POWERPC64)
- emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
- else
- emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
-
- element_offset = base_tmp;
- }
- }
+ rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
/* Create the new address pointing to the element within the vector. If we
are adding 0, we don't have to change the address. */
else if (REG_P (addr) || SUBREG_P (addr))
new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
+ /* For references to local static variables, fold a constant offset into the
+ address. */
+ else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
+ new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
+
/* Optimize D-FORM addresses with constant offset with a constant element, to
include the element offset in the address directly. */
else if (GET_CODE (addr) == PLUS)
{
rtx op0 = XEXP (addr, 0);
rtx op1 = XEXP (addr, 1);
- rtx insn;
gcc_assert (REG_P (op0) || SUBREG_P (op0));
if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
{
+ /* op0 should never be r0, because r0+offset is not valid. But it
+ doesn't hurt to make sure it is not r0. */
+ gcc_assert (reg_or_subregno (op0) != 0);
+
+ /* D-FORM address with constant element number. */
HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
rtx offset_rtx = GEN_INT (offset);
-
- if (IN_RANGE (offset, -32768, 32767)
- && (scalar_size < 8 || (offset & 0x3) == 0))
- new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
- else
- {
- emit_move_insn (base_tmp, offset_rtx);
- new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
- }
+ new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
}
else
{
- bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
- bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
+ /* If we don't have a D-FORM address with a constant element number,
+ add the two elements in the current address. Then add the offset.
- /* Note, ADDI requires the register being added to be a base
- register. If the register was R0, load it up into the temporary
- and do the add. */
- if (op1_reg_p
- && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
- {
- insn = gen_add3_insn (base_tmp, op1, element_offset);
- gcc_assert (insn != NULL_RTX);
- emit_insn (insn);
- }
-
- else if (ele_reg_p
- && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
- {
- insn = gen_add3_insn (base_tmp, element_offset, op1);
- gcc_assert (insn != NULL_RTX);
- emit_insn (insn);
- }
-
- else
- {
- emit_move_insn (base_tmp, op1);
- emit_insn (gen_add2_insn (base_tmp, element_offset));
- }
-
- new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
+ Previously, we tried to add the offset to OP1 and change the
+ address to an X-FORM format adding OP0 and BASE_TMP, but it became
+ complicated because we had to verify that op1 was not GPR0 and we
+ had a constant element offset (due to the way ADDI is defined).
+ By doing the add of OP0 and OP1 first, and then adding in the
+ offset, it has the benefit that if D-FORM instructions are
+ allowed, the offset is part of the memory access to the vector
+ element. */
+ emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
+ new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
}
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
- /* If we have a PLUS, we need to see whether the particular register class
- allows for D-FORM or X-FORM addressing. */
- if (GET_CODE (new_addr) == PLUS)
- {
- rtx op1 = XEXP (new_addr, 1);
- addr_mask_type addr_mask;
- unsigned int scalar_regno = reg_or_subregno (scalar_reg);
-
- gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
- if (INT_REGNO_P (scalar_regno))
- addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
-
- else if (FP_REGNO_P (scalar_regno))
- addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
-
- else if (ALTIVEC_REGNO_P (scalar_regno))
- addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
-
- else
- gcc_unreachable ();
-
- if (REG_P (op1) || SUBREG_P (op1))
- valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
- else
- valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
- }
+ /* If the address isn't valid, move the address into the temporary base
+ register. Some reasons it could not be valid include:
- else if (REG_P (new_addr) || SUBREG_P (new_addr))
- valid_addr_p = true;
+ The address offset overflowed the 16 or 34 bit offset size;
+ We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
+ We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
+ Only X_FORM loads can be done, and the address is D_FORM. */
- else
- valid_addr_p = false;
+ enum insn_form iform
+ = address_to_insn_form (new_addr, scalar_mode,
+ reg_to_non_prefixed (scalar_reg, scalar_mode));
- if (!valid_addr_p)
+ if (iform == INSN_FORM_BAD)
{
emit_move_insn (base_tmp, new_addr);
new_addr = base_tmp;
systems. */
if (MEM_P (src))
{
- int num_elements = GET_MODE_NUNITS (mode);
- rtx num_ele_m1 = GEN_INT (num_elements - 1);
-
- emit_insn (gen_anddi3 (element, element, num_ele_m1));
- gcc_assert (REG_P (tmp_gpr));
- emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
- tmp_gpr, scalar_mode));
+ emit_move_insn (dest,
+ rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
+ scalar_mode));
return;
}
tree field = TYPE_FIELDS (type);
/* Skip all non field decls */
- while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+ while (field != NULL
+ && (TREE_CODE (field) != FIELD_DECL
+ || DECL_FIELD_ABI_IGNORED (field)))
field = DECL_CHAIN (field);
if (field != NULL && field != type)
do {
tree field = TYPE_FIELDS (type);
/* Skip all non field decls */
- while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+ while (field != NULL
+ && (TREE_CODE (field) != FIELD_DECL
+ || DECL_FIELD_ABI_IGNORED (field)))
field = DECL_CHAIN (field);
if (! field)
break;
break;
}
- if (TARGET_PREFIXED_ADDR)
+ if (TARGET_PREFIXED)
return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
else
return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
return rs6000_legitimize_tls_address_aix (addr, model);
dest = gen_reg_rtx (Pmode);
- if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
+ if (model == TLS_MODEL_LOCAL_EXEC
+ && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
{
rtx tlsreg;
them in the .got section. So use a pointer to the .got section,
not one to secondary TOC sections used by 64-bit -mminimal-toc,
or to secondary GOT sections used by 32-bit -fPIC. */
- if (TARGET_64BIT)
+ if (rs6000_pcrel_p (cfun))
+ got = const0_rtx;
+ else if (TARGET_64BIT)
got = gen_rtx_REG (Pmode, 2);
else
{
rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
- if (rs6000_tls_size == 16)
+ if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
{
if (TARGET_64BIT)
insn = gen_tls_dtprel_64 (dest, tmp1, addr);
else
insn = gen_tls_got_tprel_32 (tmp2, got, addr);
emit_insn (insn);
- if (TARGET_64BIT)
+ if (rs6000_pcrel_p (cfun))
+ {
+ if (TARGET_64BIT)
+ insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
+ else
+ insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
+ }
+ else if (TARGET_64BIT)
insn = gen_tls_tls_64 (dest, tmp2, addr);
else
insn = gen_tls_tls_32 (dest, tmp2, addr);
bool quad_offset_p = mode_supports_dq_form (mode);
/* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
- if (VECTOR_MEM_ALTIVEC_P (mode)
+ if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
&& GET_CODE (x) == AND
&& CONST_INT_P (XEXP (x, 1))
&& INTVAL (XEXP (x, 1)) == -16)
{
HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
- if (TARGET_PREFIXED_ADDR)
+ if (TARGET_PREFIXED)
return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
else
return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
gen_lowpart (SImode,
copy_rtx (temp))));
}
+ else if (ud1 == ud3 && ud2 == ud4)
+ {
+ temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+ HOST_WIDE_INT num = (ud2 << 16) | ud1;
+ rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
+ rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
+ rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
+ emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
+ }
else if ((ud4 == 0xffff && (ud3 & 0x8000))
|| (ud4 == 0 && ! (ud3 & 0x8000)))
{
if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
return false;
+ /* Allow SD<->DD changes, since SDmode values are stored in
+ the low half of the DDmode, just like target-independent
+ code expects. We need to allow at least SD->DD since
+ rs6000_secondary_memory_needed_mode asks for that change
+ to be made for SD reloads. */
+ if ((to == DDmode && from == SDmode)
+ || (to == SDmode && from == DDmode))
+ return true;
+
if (from_size < 8 || to_size < 8)
return false;
if (DEFAULT_ABI == ABI_AIX)
s += sprintf (s,
"l%s 2,%%%u\n\t",
- ptrload, funop + 2);
+ ptrload, funop + 3);
/* We don't need the extra code to stop indirect call speculation if
calling via LR. */
sprintf (s,
"b%%T%ul\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 3);
+ funop, ptrload, funop + 4);
else
sprintf (s,
"beq%%T%ul-\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 3);
+ funop, ptrload, funop + 4);
}
else if (DEFAULT_ABI == ABI_ELFv2)
{
sprintf (s,
"b%%T%ul\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 2);
+ funop, ptrload, funop + 3);
else
sprintf (s,
"beq%%T%ul-\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 2);
+ funop, ptrload, funop + 3);
}
else
{
gen_rtx_COMPARE (comp_mode, op0, op1)));
}
- /* Some kinds of FP comparisons need an OR operation;
- under flag_finite_math_only we don't bother. */
- if (FLOAT_MODE_P (mode)
- && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
- && !flag_finite_math_only
- && (code == LE || code == GE
- || code == UNEQ || code == LTGT
- || code == UNGT || code == UNLT))
- {
- enum rtx_code or1, or2;
- rtx or1_rtx, or2_rtx, compare2_rtx;
- rtx or_result = gen_reg_rtx (CCEQmode);
-
- switch (code)
- {
- case LE: or1 = LT; or2 = EQ; break;
- case GE: or1 = GT; or2 = EQ; break;
- case UNEQ: or1 = UNORDERED; or2 = EQ; break;
- case LTGT: or1 = LT; or2 = GT; break;
- case UNGT: or1 = UNORDERED; or2 = GT; break;
- case UNLT: or1 = UNORDERED; or2 = LT; break;
- default: gcc_unreachable ();
- }
- validate_condition_mode (or1, comp_mode);
- validate_condition_mode (or2, comp_mode);
- or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
- or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
- compare2_rtx = gen_rtx_COMPARE (CCEQmode,
- gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
- const_true_rtx);
- emit_insn (gen_rtx_SET (or_result, compare2_rtx));
-
- compare_result = or_result;
- code = EQ;
- }
-
validate_condition_mode (code, GET_MODE (compare_result));
return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
return scratch;
}
+/* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
+ requires this. The result is mode MODE. */
+rtx
+rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
+{
+ rtx cond[2];
+ int n = 0;
+ if (code == LTGT || code == LE || code == UNLT)
+ cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
+ if (code == LTGT || code == GE || code == UNGT)
+ cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
+ if (code == LE || code == GE || code == UNEQ)
+ cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
+ if (code == UNLT || code == UNGT || code == UNEQ)
+ cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
+
+ gcc_assert (n == 2);
+
+ rtx cc = gen_reg_rtx (CCEQmode);
+ rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
+ emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
+
+ return cc;
+}
+
void
rs6000_emit_sCOND (machine_mode mode, rtx operands[])
{
- rtx condition_rtx;
- machine_mode op_mode;
- enum rtx_code cond_code;
- rtx result = operands[0];
+ rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
+ rtx_code cond_code = GET_CODE (condition_rtx);
- condition_rtx = rs6000_generate_compare (operands[1], mode);
- cond_code = GET_CODE (condition_rtx);
-
- if (cond_code == NE
- || cond_code == GE || cond_code == LE
- || cond_code == GEU || cond_code == LEU
- || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
+ if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
+ && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
+ ;
+ else if (cond_code == NE
+ || cond_code == GE || cond_code == LE
+ || cond_code == GEU || cond_code == LEU
+ || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
{
rtx not_result = gen_reg_rtx (CCEQmode);
rtx not_op, rev_cond_rtx;
condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
}
- op_mode = GET_MODE (XEXP (operands[1], 0));
+ machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
if (op_mode == VOIDmode)
op_mode = GET_MODE (XEXP (operands[1], 1));
if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
{
PUT_MODE (condition_rtx, DImode);
- convert_move (result, condition_rtx, 0);
+ convert_move (operands[0], condition_rtx, 0);
}
else
{
PUT_MODE (condition_rtx, SImode);
- emit_insn (gen_rtx_SET (result, condition_rtx));
+ emit_insn (gen_rtx_SET (operands[0], condition_rtx));
}
}
void
rs6000_emit_cbranch (machine_mode mode, rtx operands[])
{
- rtx condition_rtx, loc_ref;
-
- condition_rtx = rs6000_generate_compare (operands[0], mode);
- loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
- emit_jump_insn (gen_rtx_SET (pc_rtx,
- gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
- loc_ref, pc_rtx)));
+ rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
+ rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+ rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
}
/* Return the string to output a conditional branch to LABEL, which is
if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
;
- else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
+ /* Only when NaNs and signed-zeros are not in effect, smax could be
+ used for `op0 < op1 ? op1 : op0`, and smin could be used for
+ `op0 > op1 ? op1 : op0`. */
+ else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
+ && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
max_p = !max_p;
else
/* At this point we know we can use fsel. */
+ /* Don't allow compare_mode other than SFmode or DFmode, for others there
+ is no fsel instruction. */
+ if (compare_mode != SFmode && compare_mode != DFmode)
+ return 0;
+
/* Reduce the comparison to a comparison against zero. */
if (! is_against_zero)
{
return insn;
}
+/* Move instruction at POS to the end of the READY list. */
+
+static void
+move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
+{
+ rtx_insn *tmp;
+ int i;
+
+ tmp = ready[pos];
+ for (i = pos; i < lastpos; i++)
+ ready[i] = ready[i + 1];
+ ready[lastpos] = tmp;
+}
+
+/* Do Power6 specific sched_reorder2 reordering of ready list. */
+
+static int
+power6_sched_reorder2 (rtx_insn **ready, int lastpos)
+{
+ /* For Power6, we need to handle some special cases to try and keep the
+ store queue from overflowing and triggering expensive flushes.
+
+ This code monitors how load and store instructions are being issued
+ and skews the ready list one way or the other to increase the likelihood
+ that a desired instruction is issued at the proper time.
+
+ A couple of things are done. First, we maintain a "load_store_pendulum"
+ to track the current state of load/store issue.
+
+ - If the pendulum is at zero, then no loads or stores have been
+ issued in the current cycle so we do nothing.
+
+ - If the pendulum is 1, then a single load has been issued in this
+ cycle and we attempt to locate another load in the ready list to
+ issue with it.
+
+ - If the pendulum is -2, then two stores have already been
+ issued in this cycle, so we increase the priority of the first load
+ in the ready list to increase it's likelihood of being chosen first
+ in the next cycle.
+
+ - If the pendulum is -1, then a single store has been issued in this
+ cycle and we attempt to locate another store in the ready list to
+ issue with it, preferring a store to an adjacent memory location to
+ facilitate store pairing in the store queue.
+
+ - If the pendulum is 2, then two loads have already been
+ issued in this cycle, so we increase the priority of the first store
+ in the ready list to increase it's likelihood of being chosen first
+ in the next cycle.
+
+ - If the pendulum < -2 or > 2, then do nothing.
+
+ Note: This code covers the most common scenarios. There exist non
+ load/store instructions which make use of the LSU and which
+ would need to be accounted for to strictly model the behavior
+ of the machine. Those instructions are currently unaccounted
+ for to help minimize compile time overhead of this code.
+ */
+ int pos;
+ rtx load_mem, str_mem;
+
+ if (is_store_insn (last_scheduled_insn, &str_mem))
+ /* Issuing a store, swing the load_store_pendulum to the left */
+ load_store_pendulum--;
+ else if (is_load_insn (last_scheduled_insn, &load_mem))
+ /* Issuing a load, swing the load_store_pendulum to the right */
+ load_store_pendulum++;
+ else
+ return cached_can_issue_more;
+
+ /* If the pendulum is balanced, or there is only one instruction on
+ the ready list, then all is well, so return. */
+ if ((load_store_pendulum == 0) || (lastpos <= 0))
+ return cached_can_issue_more;
+
+ if (load_store_pendulum == 1)
+ {
+ /* A load has been issued in this cycle. Scan the ready list
+ for another load to issue with it */
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_load_insn (ready[pos], &load_mem))
+ {
+ /* Found a load. Move it to the head of the ready list,
+ and adjust it's priority so that it is more likely to
+ stay there */
+ move_to_end_of_ready (ready, pos, lastpos);
+
+ if (!sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[lastpos]))
+ INSN_PRIORITY (ready[lastpos])++;
+ break;
+ }
+ pos--;
+ }
+ }
+ else if (load_store_pendulum == -2)
+ {
+ /* Two stores have been issued in this cycle. Increase the
+ priority of the first load in the ready list to favor it for
+ issuing in the next cycle. */
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_load_insn (ready[pos], &load_mem)
+ && !sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[pos]))
+ {
+ INSN_PRIORITY (ready[pos])++;
+
+ /* Adjust the pendulum to account for the fact that a load
+ was found and increased in priority. This is to prevent
+ increasing the priority of multiple loads */
+ load_store_pendulum--;
+
+ break;
+ }
+ pos--;
+ }
+ }
+ else if (load_store_pendulum == -1)
+ {
+ /* A store has been issued in this cycle. Scan the ready list for
+ another store to issue with it, preferring a store to an adjacent
+ memory location */
+ int first_store_pos = -1;
+
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_store_insn (ready[pos], &str_mem))
+ {
+ rtx str_mem2;
+ /* Maintain the index of the first store found on the
+ list */
+ if (first_store_pos == -1)
+ first_store_pos = pos;
+
+ if (is_store_insn (last_scheduled_insn, &str_mem2)
+ && adjacent_mem_locations (str_mem, str_mem2))
+ {
+ /* Found an adjacent store. Move it to the head of the
+ ready list, and adjust it's priority so that it is
+ more likely to stay there */
+ move_to_end_of_ready (ready, pos, lastpos);
+
+ if (!sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[lastpos]))
+ INSN_PRIORITY (ready[lastpos])++;
+
+ first_store_pos = -1;
+
+ break;
+ };
+ }
+ pos--;
+ }
+
+ if (first_store_pos >= 0)
+ {
+ /* An adjacent store wasn't found, but a non-adjacent store was,
+ so move the non-adjacent store to the front of the ready
+ list, and adjust its priority so that it is more likely to
+ stay there. */
+ move_to_end_of_ready (ready, first_store_pos, lastpos);
+ if (!sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[lastpos]))
+ INSN_PRIORITY (ready[lastpos])++;
+ }
+ }
+ else if (load_store_pendulum == 2)
+ {
+ /* Two loads have been issued in this cycle. Increase the priority
+ of the first store in the ready list to favor it for issuing in
+ the next cycle. */
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_store_insn (ready[pos], &str_mem)
+ && !sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[pos]))
+ {
+ INSN_PRIORITY (ready[pos])++;
+
+ /* Adjust the pendulum to account for the fact that a store
+ was found and increased in priority. This is to prevent
+ increasing the priority of multiple stores */
+ load_store_pendulum++;
+
+ break;
+ }
+ pos--;
+ }
+ }
+
+ return cached_can_issue_more;
+}
+
/* Do Power9 specific sched_reorder2 reordering of ready list. */
static int
power9_sched_reorder2 (rtx_insn **ready, int lastpos)
{
int pos;
- int i;
- rtx_insn *tmp;
enum attr_type type, type2;
type = get_attr_type (last_scheduled_insn);
if (recog_memoized (ready[pos]) >= 0
&& get_attr_type (ready[pos]) == TYPE_DIV)
{
- tmp = ready[pos];
- for (i = pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, pos, lastpos);
break;
}
pos--;
{
/* Found a vector insn to pair with, move it to the
end of the ready list so it is scheduled next. */
- tmp = ready[pos];
- for (i = pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
{
/* Didn't find a vector to pair with but did find a vecload,
move it to the end of the ready list. */
- tmp = ready[vecload_pos];
- for (i = vecload_pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, vecload_pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
{
/* Found a vecload insn to pair with, move it to the
end of the ready list so it is scheduled next. */
- tmp = ready[pos];
- for (i = pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
{
/* Didn't find a vecload to pair with but did find a vector
insn, move it to the end of the ready list. */
- tmp = ready[vec_pos];
- for (i = vec_pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, vec_pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
if (sched_verbose)
fprintf (dump, "// rs6000_sched_reorder2 :\n");
- /* For Power6, we need to handle some special cases to try and keep the
- store queue from overflowing and triggering expensive flushes.
-
- This code monitors how load and store instructions are being issued
- and skews the ready list one way or the other to increase the likelihood
- that a desired instruction is issued at the proper time.
-
- A couple of things are done. First, we maintain a "load_store_pendulum"
- to track the current state of load/store issue.
-
- - If the pendulum is at zero, then no loads or stores have been
- issued in the current cycle so we do nothing.
-
- - If the pendulum is 1, then a single load has been issued in this
- cycle and we attempt to locate another load in the ready list to
- issue with it.
-
- - If the pendulum is -2, then two stores have already been
- issued in this cycle, so we increase the priority of the first load
- in the ready list to increase it's likelihood of being chosen first
- in the next cycle.
-
- - If the pendulum is -1, then a single store has been issued in this
- cycle and we attempt to locate another store in the ready list to
- issue with it, preferring a store to an adjacent memory location to
- facilitate store pairing in the store queue.
-
- - If the pendulum is 2, then two loads have already been
- issued in this cycle, so we increase the priority of the first store
- in the ready list to increase it's likelihood of being chosen first
- in the next cycle.
-
- - If the pendulum < -2 or > 2, then do nothing.
-
- Note: This code covers the most common scenarios. There exist non
- load/store instructions which make use of the LSU and which
- would need to be accounted for to strictly model the behavior
- of the machine. Those instructions are currently unaccounted
- for to help minimize compile time overhead of this code.
- */
+ /* Do Power6 dependent reordering if necessary. */
if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
- {
- int pos;
- int i;
- rtx_insn *tmp;
- rtx load_mem, str_mem;
-
- if (is_store_insn (last_scheduled_insn, &str_mem))
- /* Issuing a store, swing the load_store_pendulum to the left */
- load_store_pendulum--;
- else if (is_load_insn (last_scheduled_insn, &load_mem))
- /* Issuing a load, swing the load_store_pendulum to the right */
- load_store_pendulum++;
- else
- return cached_can_issue_more;
-
- /* If the pendulum is balanced, or there is only one instruction on
- the ready list, then all is well, so return. */
- if ((load_store_pendulum == 0) || (*pn_ready <= 1))
- return cached_can_issue_more;
-
- if (load_store_pendulum == 1)
- {
- /* A load has been issued in this cycle. Scan the ready list
- for another load to issue with it */
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_load_insn (ready[pos], &load_mem))
- {
- /* Found a load. Move it to the head of the ready list,
- and adjust it's priority so that it is more likely to
- stay there */
- tmp = ready[pos];
- for (i=pos; i<*pn_ready-1; i++)
- ready[i] = ready[i + 1];
- ready[*pn_ready-1] = tmp;
-
- if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
- INSN_PRIORITY (tmp)++;
- break;
- }
- pos--;
- }
- }
- else if (load_store_pendulum == -2)
- {
- /* Two stores have been issued in this cycle. Increase the
- priority of the first load in the ready list to favor it for
- issuing in the next cycle. */
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_load_insn (ready[pos], &load_mem)
- && !sel_sched_p ()
- && INSN_PRIORITY_KNOWN (ready[pos]))
- {
- INSN_PRIORITY (ready[pos])++;
-
- /* Adjust the pendulum to account for the fact that a load
- was found and increased in priority. This is to prevent
- increasing the priority of multiple loads */
- load_store_pendulum--;
-
- break;
- }
- pos--;
- }
- }
- else if (load_store_pendulum == -1)
- {
- /* A store has been issued in this cycle. Scan the ready list for
- another store to issue with it, preferring a store to an adjacent
- memory location */
- int first_store_pos = -1;
-
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_store_insn (ready[pos], &str_mem))
- {
- rtx str_mem2;
- /* Maintain the index of the first store found on the
- list */
- if (first_store_pos == -1)
- first_store_pos = pos;
-
- if (is_store_insn (last_scheduled_insn, &str_mem2)
- && adjacent_mem_locations (str_mem, str_mem2))
- {
- /* Found an adjacent store. Move it to the head of the
- ready list, and adjust it's priority so that it is
- more likely to stay there */
- tmp = ready[pos];
- for (i=pos; i<*pn_ready-1; i++)
- ready[i] = ready[i + 1];
- ready[*pn_ready-1] = tmp;
-
- if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
- INSN_PRIORITY (tmp)++;
-
- first_store_pos = -1;
-
- break;
- };
- }
- pos--;
- }
-
- if (first_store_pos >= 0)
- {
- /* An adjacent store wasn't found, but a non-adjacent store was,
- so move the non-adjacent store to the front of the ready
- list, and adjust its priority so that it is more likely to
- stay there. */
- tmp = ready[first_store_pos];
- for (i=first_store_pos; i<*pn_ready-1; i++)
- ready[i] = ready[i + 1];
- ready[*pn_ready-1] = tmp;
- if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
- INSN_PRIORITY (tmp)++;
- }
- }
- else if (load_store_pendulum == 2)
- {
- /* Two loads have been issued in this cycle. Increase the priority
- of the first store in the ready list to favor it for issuing in
- the next cycle. */
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_store_insn (ready[pos], &str_mem)
- && !sel_sched_p ()
- && INSN_PRIORITY_KNOWN (ready[pos]))
- {
- INSN_PRIORITY (ready[pos])++;
-
- /* Adjust the pendulum to account for the fact that a store
- was found and increased in priority. This is to prevent
- increasing the priority of multiple stores */
- load_store_pendulum++;
-
- break;
- }
- pos--;
- }
- }
- }
+ return power6_sched_reorder2 (ready, *pn_ready - 1);
/* Do Power9 dependent reordering if necessary. */
if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
if (rs6000_pcrel_p (cfun))
{
rtx reg = gen_rtx_REG (Pmode, regno);
- rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
- UNSPEC_PLT_PCREL);
+ rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (3, base, call_ref, arg),
+ UNSPECV_PLT_PCREL);
emit_insn (gen_rtx_SET (reg, u));
return reg;
}
rtx reg = gen_rtx_REG (Pmode, regno);
rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
UNSPEC_PLT16_HA);
- rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
- UNSPEC_PLT16_LO);
+ rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (3, reg, call_ref, arg),
+ UNSPECV_PLT16_LO);
emit_insn (gen_rtx_SET (reg, hi));
emit_insn (gen_rtx_SET (reg, lo));
return reg;
{ "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
- { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
+ { "prefixed", OPTION_MASK_PREFIXED, false, true },
{ "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
{ "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
if ((flags & mask) == 0)
{
no_str = "no-";
- len += sizeof ("no-") - 1;
+ len += strlen ("no-");
}
flags &= ~mask;
if ((flags & mask) != 0)
{
no_str = "no-";
- len += sizeof ("no-") - 1;
+ len += strlen ("no-");
}
flags |= mask;
fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
comma = ", ";
- comma_len = sizeof (", ") - 1;
+ comma_len = strlen (", ");
}
fputs ("\n", file);
{ OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
{ OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
{ OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
+ { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
};
for (i = 0; i < ARRAY_SIZE (flags); i++)
DECL_INITIAL (decl) = make_node (BLOCK);
DECL_STATIC_CONSTRUCTOR (decl) = 0;
+ if (DECL_COMDAT_GROUP (default_decl)
+ || TREE_PUBLIC (default_decl))
+ {
+ /* In this case, each translation unit with a call to this
+ versioned function will put out a resolver. Ensure it
+ is comdat to keep just one copy. */
+ DECL_COMDAT (decl) = 1;
+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+ }
+ else
+ TREE_PUBLIC (dispatch_decl) = 0;
+
/* Build result decl and add to function_decl. */
tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
DECL_CONTEXT (t) = decl;
rtx toc_restore = NULL_RTX;
rtx func_addr;
rtx abi_reg = NULL_RTX;
- rtx call[4];
+ rtx call[5];
int n_call;
rtx insn;
bool is_pltseq_longcall;
call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
if (value != NULL_RTX)
call[0] = gen_rtx_SET (value, call[0]);
- n_call = 1;
+ call[1] = gen_rtx_USE (VOIDmode, cookie);
+ n_call = 2;
if (toc_load)
call[n_call++] = toc_load;
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
return INSN_FORM_UPDATE;
- /* Handle PC-relative symbols and labels. Check for both local and external
- symbols. Assume labels are always local. */
+ /* Handle PC-relative symbols and labels. Check for both local and
+ external symbols. Assume labels are always local. TLS symbols
+ are not PC-relative for rs6000. */
if (TARGET_PCREL)
{
- if (SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
- return INSN_FORM_PCREL_EXTERNAL;
-
- if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
+ if (LABEL_REF_P (addr))
return INSN_FORM_PCREL_LOCAL;
+
+ if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
+ {
+ if (!SYMBOL_REF_LOCAL_P (addr))
+ return INSN_FORM_PCREL_EXTERNAL;
+ else
+ return INSN_FORM_PCREL_LOCAL;
+ }
}
if (GET_CODE (addr) == CONST)
return INSN_FORM_BAD;
HOST_WIDE_INT offset = INTVAL (op1);
- if (!SIGNED_34BIT_OFFSET_P (offset))
+ if (!SIGNED_INTEGER_34BIT_P (offset))
return INSN_FORM_BAD;
/* Check for local and external PC-relative addresses. Labels are always
- local. */
+ local. TLS symbols are not PC-relative for rs6000. */
if (TARGET_PCREL)
{
- if (SYMBOL_REF_P (op0) && !SYMBOL_REF_LOCAL_P (op0))
- return INSN_FORM_PCREL_EXTERNAL;
-
- if (SYMBOL_REF_P (op0) || LABEL_REF_P (op0))
+ if (LABEL_REF_P (op0))
return INSN_FORM_PCREL_LOCAL;
+
+ if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
+ {
+ if (!SYMBOL_REF_LOCAL_P (op0))
+ return INSN_FORM_PCREL_EXTERNAL;
+ else
+ return INSN_FORM_PCREL_LOCAL;
+ }
}
/* If it isn't PC-relative, the address must use a base register. */
return INSN_FORM_BAD;
/* Large offsets must be prefixed. */
- if (!SIGNED_16BIT_OFFSET_P (offset))
+ if (!SIGNED_INTEGER_16BIT_P (offset))
{
- if (TARGET_PREFIXED_ADDR)
+ if (TARGET_PREFIXED)
return INSN_FORM_PREFIXED_NUMERIC;
return INSN_FORM_BAD;
if ((offset & 3) == 0)
return INSN_FORM_DS;
- else if (TARGET_PREFIXED_ADDR)
+ else if (TARGET_PREFIXED)
return INSN_FORM_PREFIXED_NUMERIC;
else
if ((offset & 15) == 0)
return INSN_FORM_DQ;
- else if (TARGET_PREFIXED_ADDR)
+ else if (TARGET_PREFIXED)
return INSN_FORM_PREFIXED_NUMERIC;
else
return INSN_FORM_BAD;
}
+/* Helper function to see if we're potentially looking at lfs/stfs.
+ - PARALLEL containing a SET and a CLOBBER
+ - stfs:
+ - SET is from UNSPEC_SI_FROM_SF to MEM:SI
+ - CLOBBER is a V4SF
+ - lfs:
+ - SET is from UNSPEC_SF_FROM_SI to REG:SF
+ - CLOBBER is a DI
+ */
+
+static bool
+is_lfs_stfs_insn (rtx_insn *insn)
+{
+ rtx pattern = PATTERN (insn);
+ if (GET_CODE (pattern) != PARALLEL)
+ return false;
+
+ /* This should be a parallel with exactly one set and one clobber. */
+ if (XVECLEN (pattern, 0) != 2)
+ return false;
+
+ rtx set = XVECEXP (pattern, 0, 0);
+ if (GET_CODE (set) != SET)
+ return false;
+
+ rtx clobber = XVECEXP (pattern, 0, 1);
+ if (GET_CODE (clobber) != CLOBBER)
+ return false;
+
+ /* All we care is that the destination of the SET is a mem:SI,
+ the source should be an UNSPEC_SI_FROM_SF, and the clobber
+ should be a scratch:V4SF. */
+
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+ rtx scratch = SET_DEST (clobber);
+
+ if (GET_CODE (src) != UNSPEC)
+ return false;
+
+ /* stfs case. */
+ if (XINT (src, 1) == UNSPEC_SI_FROM_SF
+ && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
+ && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
+ return true;
+
+ /* lfs case. */
+ if (XINT (src, 1) == UNSPEC_SF_FROM_SI
+ && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
+ && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
+ return true;
+
+ return false;
+}
+
/* Helper function to take a REG and a MODE and turn it into the non-prefixed
instruction format (D/DS/DQ) used for offset memory. */
unsigned size = GET_MODE_SIZE (mode);
/* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
- 128-bit floating point, and 128-bit integers. */
+ 128-bit floating point, and 128-bit integers. Before power9, only indexed
+ addressing was available for vectors. */
if (FP_REGNO_P (r))
{
if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
&& (VECTOR_MODE_P (mode)
|| FLOAT128_VECTOR_P (mode)
|| mode == TImode || mode == CTImode))
- return NON_PREFIXED_DQ;
+ return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
else
return NON_PREFIXED_DEFAULT;
}
/* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
- 128-bit floating point, and 128-bit integers. */
+ 128-bit floating point, and 128-bit integers. Before power9, only indexed
+ addressing was available. */
else if (ALTIVEC_REGNO_P (r))
{
+ if (!TARGET_P9_VECTOR)
+ return NON_PREFIXED_X;
+
if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
return NON_PREFIXED_DS;
else
non_prefixed = reg_to_non_prefixed (reg, mem_mode);
- return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
+ if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+ return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
+ else
+ return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
}
/* Whether a store instruction is a prefixed instruction. This is called from
return false;
machine_mode mem_mode = GET_MODE (mem);
+ rtx addr = XEXP (mem, 0);
enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
- return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
+
+ /* Need to make sure we aren't looking at a stfs which doesn't look
+ like the other things reg_to_non_prefixed/address_is_prefixed
+ looks for. */
+ if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+ return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
+ else
+ return address_is_prefixed (addr, mem_mode, non_prefixed);
}
/* Whether a load immediate or add instruction is a prefixed instruction. This
int
rs6000_adjust_insn_length (rtx_insn *insn, int length)
{
- if (TARGET_PREFIXED_ADDR && NONJUMP_INSN_P (insn))
+ if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
{
rtx pattern = PATTERN (insn);
if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
tree fenv_var = create_tmp_var_raw (double_type_node);
TREE_ADDRESSABLE (fenv_var) = 1;
- tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
+ tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
+ build4 (TARGET_EXPR, double_type_node, fenv_var,
+ void_node, NULL_TREE, NULL_TREE));
*hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
*clear = build_call_expr (atomic_clear_decl, 0);
/* Mask to clear everything except for the rounding modes and non-IEEE
arithmetic flag. */
- const unsigned HOST_WIDE_INT hold_exception_mask =
- HOST_WIDE_INT_C (0xffffffff00000007);
+ const unsigned HOST_WIDE_INT hold_exception_mask
+ = HOST_WIDE_INT_C (0xffffffff00000007);
tree fenv_var = create_tmp_var_raw (double_type_node);
- tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
+ tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
+ NULL_TREE, NULL_TREE);
tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
/* Mask to clear everything except for the rounding modes and non-IEEE
arithmetic flag. */
- const unsigned HOST_WIDE_INT clear_exception_mask =
- HOST_WIDE_INT_C (0xffffffff00000000);
+ const unsigned HOST_WIDE_INT clear_exception_mask
+ = HOST_WIDE_INT_C (0xffffffff00000000);
tree fenv_clear = create_tmp_var_raw (double_type_node);
- tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
+ tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
+ call_mffs, NULL_TREE, NULL_TREE);
tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
(*(uint64_t*)fenv_var 0x1ff80fff);
__builtin_mtfsf (0xff, fenv_update); */
- const unsigned HOST_WIDE_INT update_exception_mask =
- HOST_WIDE_INT_C (0xffffffff1fffff00);
- const unsigned HOST_WIDE_INT new_exception_mask =
- HOST_WIDE_INT_C (0x1ff80fff);
+ const unsigned HOST_WIDE_INT update_exception_mask
+ = HOST_WIDE_INT_C (0xffffffff1fffff00);
+ const unsigned HOST_WIDE_INT new_exception_mask
+ = HOST_WIDE_INT_C (0x1ff80fff);
tree old_fenv = create_tmp_var_raw (double_type_node);
- tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
+ tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
+ call_mffs, NULL_TREE, NULL_TREE);
tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
return true;
}
+/* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
+
+static bool
+rs6000_cannot_substitute_mem_equiv_p (rtx mem)
+{
+ gcc_assert (MEM_P (mem));
+
+ /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
+ type addresses, so don't allow MEMs with those address types to be
+ substituted as an equivalent expression. See PR93974 for details. */
+ if (GET_CODE (XEXP (mem, 0)) == AND)
+ return true;
+
+ return false;
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rs6000.h"