/* Subroutines used for code generation on IBM RS/6000.
- Copyright (C) 1991-2019 Free Software Foundation, Inc.
+ Copyright (C) 1991-2020 Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
This file is part of GCC.
#include "gimple-ssa.h"
#include "gimple-walk.h"
#include "intl.h"
-#include "params.h"
#include "tm-constrs.h"
#include "tree-vectorizer.h"
#include "target-globals.h"
#include "tree-vrp.h"
#include "tree-ssanames.h"
#include "rs6000-internal.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
#endif
#endif
-/* Support targetm.vectorize.builtin_mask_for_load. */
-GTY(()) tree altivec_builtin_mask_for_load;
+/* Don't enable PC-relative addressing if the target does not support it. */
+#ifndef PCREL_SUPPORTED_BY_OS
+#define PCREL_SUPPORTED_BY_OS 0
+#endif
-/* Set to nonzero once AIX common-mode calls have been defined. */
-static GTY(()) int common_mode_defined;
+/* Support targetm.vectorize.builtin_mask_for_load. */
+tree altivec_builtin_mask_for_load;
#ifdef USING_ELFOS_H
/* Counter for labels which are to be placed in .fixup. */
static GTY(()) section *read_only_private_data_section;
static GTY(()) section *sdata2_section;
-extern GTY(()) section *toc_section;
section *toc_section = 0;
/* Describe the vector unit used for modes. */
int rs6000_vector_align[NUM_MACHINE_MODES];
/* Map selected modes to types for builtins. */
-GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
+tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
/* What modes to automatically generate reciprocal divide estimate (fre) and
reciprocal sqrt (frsqrte) for. */
machine_mode,
secondary_reload_info *,
bool);
+static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode);
rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
/* Hash table stuff for keeping track of TOC entries. */
#undef TARGET_VECTORIZE_DESTROY_COST_DATA
#define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
#undef TARGET_BUILTIN_DECL
#undef TARGET_PREDICT_DOLOOP_P
#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
+#undef TARGET_HAVE_COUNT_REG_DECR_P
+#define TARGET_HAVE_COUNT_REG_DECR_P true
+
+/* 1000000000 is infinite cost in IVOPTs. */
+#undef TARGET_DOLOOP_COST_FOR_GENERIC
+#define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
+
+#undef TARGET_DOLOOP_COST_FOR_ADDRESS
+#define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
+
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
#define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
+
+#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
+#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
+ rs6000_cannot_substitute_mem_equiv_p
\f
/* Processor table. */
/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
static bool
-rs6000_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
- unsigned int regno, machine_mode mode)
+rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
+ machine_mode mode)
{
if (TARGET_32BIT
&& TARGET_POWERPC64
spaces = 0;
}
else
- spaces += sizeof (" Reload=sl") - 1;
+ spaces += strlen (" Reload=sl");
if (reg_addr[m].scalar_in_vmx_p)
{
spaces = 0;
}
else
- spaces += sizeof (" Upper=y") - 1;
+ spaces += strlen (" Upper=y");
if (rs6000_vector_unit[m] != VECTOR_NONE
|| rs6000_vector_mem[m] != VECTOR_NONE)
&& TARGET_HARD_FLOAT
&& !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
| ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
- | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
+ | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0)
+ | ((TARGET_FUTURE) ? RS6000_BTM_FUTURE : 0));
}
/* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
}
+ if (!TARGET_FPRND && TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
+ /* TARGET_VSX = 1 implies Power 7 and newer */
+ error ("%qs requires %qs", "-mvsx", "-mfprnd");
+ rs6000_isa_flags &= ~OPTION_MASK_FPRND;
+ }
+
if (TARGET_DIRECT_MOVE && !TARGET_VSX)
{
if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
}
/* Enable the default support for IEEE 128-bit floating point on Linux VSX
- sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
+ sytems. In GCC 7, we would enable the IEEE 128-bit floating point
infrastructure (-mfloat128-type) but not enable the actual __float128 type
unless the user used the explicit -mfloat128. In GCC 8, we enable both
the keyword as well as the type. */
if (!TARGET_VSX)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
- error ("%qs requires VSX support", "%<-mfloat128%>");
+ error ("%qs requires VSX support", "-mfloat128");
TARGET_FLOAT128_TYPE = 0;
rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
}
- /* -mprefixed-addr (and hence -mpcrel) requires -mcpu=future. */
- if (TARGET_PREFIXED_ADDR && !TARGET_FUTURE)
+ /* Enable -mprefixed by default on 'future' systems. */
+ if (TARGET_FUTURE && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
+ rs6000_isa_flags |= OPTION_MASK_PREFIXED;
+
+ /* -mprefixed requires -mcpu=future. */
+ else if (TARGET_PREFIXED && !TARGET_FUTURE)
{
- if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
- error ("%qs requires %qs", "-mpcrel", "-mcpu=future");
- else if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED_ADDR) != 0)
- error ("%qs requires %qs", "-mprefixed-addr", "-mcpu=future");
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
+ error ("%qs requires %qs", "-mprefixed", "-mcpu=future");
- rs6000_isa_flags &= ~(OPTION_MASK_PCREL | OPTION_MASK_PREFIXED_ADDR);
+ rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
}
/* -mpcrel requires prefixed load/store addressing. */
- if (TARGET_PCREL && !TARGET_PREFIXED_ADDR)
+ if (TARGET_PCREL && !TARGET_PREFIXED)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
- error ("%qs requires %qs", "-mpcrel", "-mprefixed-addr");
+ error ("%qs requires %qs", "-mpcrel", "-mprefixed");
rs6000_isa_flags &= ~OPTION_MASK_PCREL;
}
SUB3TARGET_OVERRIDE_OPTIONS;
#endif
+ /* If the ABI has support for PC-relative relocations, enable it by default.
+ This test depends on the sub-target tests above setting the code model to
+ medium for ELF v2 systems. */
+ if (PCREL_SUPPORTED_BY_OS
+ && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
+ rs6000_isa_flags |= OPTION_MASK_PCREL;
+
/* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
after the subtarget override options are done. */
- if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
+ else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
{
if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
str_align_loops = "16";
}
}
-
- if (flag_align_jumps && !str_align_jumps)
- str_align_jumps = "16";
- if (flag_align_loops && !str_align_loops)
- str_align_loops = "16";
}
/* Arrange to save and restore machine status around nested functions. */
if (global_init_p)
{
- maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
- rs6000_cost->simultaneous_prefetches,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
- rs6000_cost->cache_line_size,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
- global_options.x_param_values,
- global_options_set.x_param_values);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_simultaneous_prefetches,
+ rs6000_cost->simultaneous_prefetches);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_l1_cache_size,
+ rs6000_cost->l1_cache_size);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_l1_cache_line_size,
+ rs6000_cost->cache_line_size);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_l2_cache_size,
+ rs6000_cost->l2_cache_size);
/* Increase loop peeling limits based on performance analysis. */
- maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
- global_options.x_param_values,
- global_options_set.x_param_values);
- maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
- global_options.x_param_values,
- global_options_set.x_param_values);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_max_peeled_insns, 400);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_max_completely_peeled_insns, 400);
/* Use the 'model' -fsched-pressure algorithm by default. */
- maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
- SCHED_PRESSURE_MODEL,
- global_options.x_param_values,
- global_options_set.x_param_values);
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_sched_pressure_algorithm,
+ SCHED_PRESSURE_MODEL);
+
+ /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
+ turns -frename-registers on. */
+ if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
+ || (global_options_set.x_flag_unroll_all_loops
+ && flag_unroll_all_loops))
+ {
+ if (!global_options_set.x_unroll_only_small_loops)
+ unroll_only_small_loops = 0;
+ if (!global_options_set.x_flag_rename_registers)
+ flag_rename_registers = 1;
+ }
/* If using typedef char *va_list, signal that
__builtin_va_start (&ap, 0) can be optimized to
switch (type_of_cost)
{
case scalar_stmt:
- case scalar_load:
case scalar_store:
case vector_stmt:
- case vector_load:
case vector_store:
case vec_to_scalar:
case scalar_to_vec:
case cond_branch_not_taken:
return 1;
+ case scalar_load:
+ case vector_load:
+ /* Like rs6000_insn_cost, make load insns cost a bit more. */
+ return 2;
case vec_perm:
- if (TARGET_VSX)
+ /* Power7 has only one permute unit, make it a bit expensive. */
+ if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
return 3;
else
return 1;
case vec_promote_demote:
- if (TARGET_VSX)
- return 4;
- else
- return 1;
+ /* Power7 has only one permute/pack unit, make it a bit expensive. */
+ if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
+ return 4;
+ else
+ return 1;
case cond_branch_taken:
return 3;
case unaligned_load:
case vector_gather_load:
+ /* Like rs6000_insn_cost, make load insns cost a bit more. */
if (TARGET_EFFICIENT_UNALIGNED_VSX)
- return 1;
-
- if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
- {
- elements = TYPE_VECTOR_SUBPARTS (vectype);
- if (elements == 2)
- /* Double word aligned. */
- return 2;
-
- if (elements == 4)
- {
- switch (misalign)
- {
- case 8:
- /* Double word aligned. */
- return 2;
+ return 2;
- case -1:
- /* Unknown misalignment. */
- case 4:
- case 12:
- /* Word aligned. */
- return 22;
+ if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+ {
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
+ if (elements == 2)
+ /* Double word aligned. */
+ return 4;
- default:
- gcc_unreachable ();
- }
- }
- }
+ if (elements == 4)
+ {
+ switch (misalign)
+ {
+ case 8:
+ /* Double word aligned. */
+ return 4;
+
+ case -1:
+ /* Unknown misalignment. */
+ case 4:
+ case 12:
+ /* Word aligned. */
+ return 33;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
- if (TARGET_ALTIVEC)
- /* Misaligned loads are not supported. */
- gcc_unreachable ();
+ if (TARGET_ALTIVEC)
+ /* Misaligned loads are not supported. */
+ gcc_unreachable ();
- return 2;
+ /* Like rs6000_insn_cost, make load insns cost a bit more. */
+ return 4;
case unaligned_store:
case vector_scatter_store:
static machine_mode
rs6000_preferred_simd_mode (scalar_mode mode)
{
- if (TARGET_VSX)
- switch (mode)
- {
- case E_DFmode:
- return V2DFmode;
- default:;
- }
- if (TARGET_ALTIVEC || TARGET_VSX)
- switch (mode)
- {
- case E_SFmode:
- return V4SFmode;
- case E_TImode:
- return V1TImode;
- case E_DImode:
- return V2DImode;
- case E_SImode:
- return V4SImode;
- case E_HImode:
- return V8HImode;
- case E_QImode:
- return V16QImode;
- default:;
- }
+ opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
+
+ if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
+ return vmode.require ();
+
return word_mode;
}
return data;
}
+/* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
+ For some statement, we would like to further fine-grain tweak the cost on
+ top of rs6000_builtin_vectorization_cost handling which doesn't have any
+ information on statement operation codes etc. One typical case here is
+ COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
+ for scalar cost, but it should be priced more whatever transformed to either
+ compare + branch or compare + isel instructions. */
+
+static unsigned
+adjust_vectorization_cost (enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info)
+{
+ if (kind == scalar_stmt && stmt_info && stmt_info->stmt
+ && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
+ {
+ tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
+ if (subcode == COND_EXPR)
+ return 2;
+ }
+
+ return 0;
+}
+
/* Implement targetm.vectorize.add_stmt_cost. */
static unsigned
-rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
- struct _stmt_vec_info *stmt_info, int misalign,
- enum vect_cost_model_location where)
+rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
+ enum vect_cost_for_stmt kind,
+ struct _stmt_vec_info *stmt_info, tree vectype,
+ int misalign, enum vect_cost_model_location where)
{
rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
unsigned retval = 0;
if (flag_vect_cost_model)
{
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
misalign);
+ stmt_cost += adjust_vectorization_cost (kind, stmt_info);
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
- if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ if (where == vect_body && stmt_info
+ && stmt_in_inner_loop_p (vinfo, stmt_info))
count *= 50; /* FIXME. */
retval = (unsigned) (count * stmt_cost);
free (data);
}
+/* Implement targetm.loop_unroll_adjust. */
+
+static unsigned
+rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
+{
+ if (unroll_only_small_loops)
+ {
+ /* TODO: This is hardcoded to 10 right now. It can be refined, for
+ example we may want to unroll very small loops more times (4 perhaps).
+ We also should use a PARAM for this. */
+ if (loop->ninsns <= 10)
+ return MIN (2, nunroll);
+ else
+ return 0;
+ }
+
+ return nunroll;
+}
+
/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
library with vectorized intrinsics. */
if (!bname)
return NULL_TREE;
- strcpy (name, bname + sizeof ("__builtin_") - 1);
+ strcpy (name, bname + strlen ("__builtin_"));
strcat (name, suffix);
if (n_args == 1)
num_insns_constant_gpr (HOST_WIDE_INT value)
{
/* signed constant loadable with addi */
- if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
+ if (SIGNED_INTEGER_16BIT_P (value))
return 1;
/* constant loadable with addis */
&& (value >> 31 == -1 || value >> 31 == 0))
return 1;
+ /* PADDI can support up to 34 bit signed integers. */
+ else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
+ return 1;
+
else if (TARGET_POWERPC64)
{
HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
&& rs6000_is_valid_and_mask (GEN_INT (low), DImode))
insns = 2;
total += insns;
- value >>= BITS_PER_WORD;
+ /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
+ it all at once would be UB. */
+ value >>= (BITS_PER_WORD - 1);
+ value >>= 1;
}
return total;
}
}
}
+/* Return the offset within a memory object (MEM) of a vector type to a given
+ element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
+ the element is constant, we return a constant integer.
+
+ Otherwise, we use a base register temporary to calculate the offset after
+ masking it to fit within the bounds of the vector and scaling it. The
+ masking is required by the 64-bit ELF version 2 ABI for the vec_extract
+ built-in function. */
+
+static rtx
+get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
+{
+ if (CONST_INT_P (element))
+ return GEN_INT (INTVAL (element) * scalar_size);
+
+ /* All insns should use the 'Q' constraint (address is a single register) if
+ the element number is not a constant. */
+ gcc_assert (satisfies_constraint_Q (mem));
+
+ /* Mask the element to make sure the element number is between 0 and the
+ maximum number of elements - 1 so that we don't generate an address
+ outside the vector. */
+ rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
+ rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
+ emit_insn (gen_rtx_SET (base_tmp, and_op));
+
+ /* Shift the element to get the byte offset from the element number. */
+ int shift = exact_log2 (scalar_size);
+ gcc_assert (shift >= 0);
+
+ if (shift > 0)
+ {
+ rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
+ emit_insn (gen_rtx_SET (base_tmp, shift_op));
+ }
+
+ return base_tmp;
+}
+
+/* Helper function update PC-relative addresses when we are adjusting a memory
+ address (ADDR) to a vector to point to a scalar field within the vector with
+ a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
+ use the base register temporary (BASE_TMP) to form the address. */
+
+static rtx
+adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
+{
+ rtx new_addr = NULL;
+
+ gcc_assert (CONST_INT_P (element_offset));
+
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx op0 = XEXP (addr, 0);
+ rtx op1 = XEXP (addr, 1);
+
+ if (CONST_INT_P (op1))
+ {
+ HOST_WIDE_INT offset
+ = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
+
+ if (offset == 0)
+ new_addr = op0;
+
+ else
+ {
+ rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
+ new_addr = gen_rtx_CONST (Pmode, plus);
+ }
+ }
+
+ else
+ {
+ emit_move_insn (base_tmp, addr);
+ new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
+ }
+ }
+
+ else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
+ {
+ rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
+ new_addr = gen_rtx_CONST (Pmode, plus);
+ }
+
+ else
+ gcc_unreachable ();
+
+ return new_addr;
+}
+
/* Adjust a memory address (MEM) of a vector type to point to a scalar field
within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
temporary (BASE_TMP) to fixup the address. Return the new memory address
- that is valid for reads or writes to a given register (SCALAR_REG). */
+ that is valid for reads or writes to a given register (SCALAR_REG).
+
+ This function is expected to be called after reload is completed when we are
+ splitting insns. The temporary BASE_TMP might be set multiple times with
+ this code. */
rtx
rs6000_adjust_vec_address (rtx scalar_reg,
{
unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
rtx addr = XEXP (mem, 0);
- rtx element_offset;
rtx new_addr;
- bool valid_addr_p;
+
+ gcc_assert (!reg_mentioned_p (base_tmp, addr));
+ gcc_assert (!reg_mentioned_p (base_tmp, element));
/* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
/* Calculate what we need to add to the address to get the element
address. */
- if (CONST_INT_P (element))
- element_offset = GEN_INT (INTVAL (element) * scalar_size);
- else
- {
- int byte_shift = exact_log2 (scalar_size);
- gcc_assert (byte_shift >= 0);
-
- if (byte_shift == 0)
- element_offset = element;
-
- else
- {
- if (TARGET_POWERPC64)
- emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
- else
- emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
-
- element_offset = base_tmp;
- }
- }
+ rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
/* Create the new address pointing to the element within the vector. If we
are adding 0, we don't have to change the address. */
else if (REG_P (addr) || SUBREG_P (addr))
new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
+ /* For references to local static variables, fold a constant offset into the
+ address. */
+ else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
+ new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
+
/* Optimize D-FORM addresses with constant offset with a constant element, to
include the element offset in the address directly. */
else if (GET_CODE (addr) == PLUS)
{
rtx op0 = XEXP (addr, 0);
rtx op1 = XEXP (addr, 1);
- rtx insn;
gcc_assert (REG_P (op0) || SUBREG_P (op0));
if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
{
+ /* op0 should never be r0, because r0+offset is not valid. But it
+ doesn't hurt to make sure it is not r0. */
+ gcc_assert (reg_or_subregno (op0) != 0);
+
+ /* D-FORM address with constant element number. */
HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
rtx offset_rtx = GEN_INT (offset);
-
- if (IN_RANGE (offset, -32768, 32767)
- && (scalar_size < 8 || (offset & 0x3) == 0))
- new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
- else
- {
- emit_move_insn (base_tmp, offset_rtx);
- new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
- }
+ new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
}
else
{
- bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
- bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
-
- /* Note, ADDI requires the register being added to be a base
- register. If the register was R0, load it up into the temporary
- and do the add. */
- if (op1_reg_p
- && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
- {
- insn = gen_add3_insn (base_tmp, op1, element_offset);
- gcc_assert (insn != NULL_RTX);
- emit_insn (insn);
- }
-
- else if (ele_reg_p
- && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
- {
- insn = gen_add3_insn (base_tmp, element_offset, op1);
- gcc_assert (insn != NULL_RTX);
- emit_insn (insn);
- }
-
- else
- {
- emit_move_insn (base_tmp, op1);
- emit_insn (gen_add2_insn (base_tmp, element_offset));
- }
+ /* If we don't have a D-FORM address with a constant element number,
+ add the two elements in the current address. Then add the offset.
- new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
+ Previously, we tried to add the offset to OP1 and change the
+ address to an X-FORM format adding OP0 and BASE_TMP, but it became
+ complicated because we had to verify that op1 was not GPR0 and we
+ had a constant element offset (due to the way ADDI is defined).
+ By doing the add of OP0 and OP1 first, and then adding in the
+ offset, it has the benefit that if D-FORM instructions are
+ allowed, the offset is part of the memory access to the vector
+ element. */
+ emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
+ new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
}
new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
}
- /* If we have a PLUS, we need to see whether the particular register class
- allows for D-FORM or X-FORM addressing. */
- if (GET_CODE (new_addr) == PLUS)
- {
- rtx op1 = XEXP (new_addr, 1);
- addr_mask_type addr_mask;
- unsigned int scalar_regno = reg_or_subregno (scalar_reg);
-
- gcc_assert (HARD_REGISTER_NUM_P (scalar_regno));
- if (INT_REGNO_P (scalar_regno))
- addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
-
- else if (FP_REGNO_P (scalar_regno))
- addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
-
- else if (ALTIVEC_REGNO_P (scalar_regno))
- addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
-
- else
- gcc_unreachable ();
-
- if (REG_P (op1) || SUBREG_P (op1))
- valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
- else
- valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
- }
+ /* If the address isn't valid, move the address into the temporary base
+ register. Some reasons it could not be valid include:
- else if (REG_P (new_addr) || SUBREG_P (new_addr))
- valid_addr_p = true;
+ The address offset overflowed the 16 or 34 bit offset size;
+ We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
+ We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
+ Only X_FORM loads can be done, and the address is D_FORM. */
- else
- valid_addr_p = false;
+ enum insn_form iform
+ = address_to_insn_form (new_addr, scalar_mode,
+ reg_to_non_prefixed (scalar_reg, scalar_mode));
- if (!valid_addr_p)
+ if (iform == INSN_FORM_BAD)
{
emit_move_insn (base_tmp, new_addr);
new_addr = base_tmp;
systems. */
if (MEM_P (src))
{
- int num_elements = GET_MODE_NUNITS (mode);
- rtx num_ele_m1 = GEN_INT (num_elements - 1);
-
- emit_insn (gen_anddi3 (element, element, num_ele_m1));
- gcc_assert (REG_P (tmp_gpr));
- emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
- tmp_gpr, scalar_mode));
+ emit_move_insn (dest,
+ rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
+ scalar_mode));
return;
}
tree field = TYPE_FIELDS (type);
/* Skip all non field decls */
- while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+ while (field != NULL
+ && (TREE_CODE (field) != FIELD_DECL
+ || DECL_FIELD_ABI_IGNORED (field)))
field = DECL_CHAIN (field);
if (field != NULL && field != type)
do {
tree field = TYPE_FIELDS (type);
/* Skip all non field decls */
- while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+ while (field != NULL
+ && (TREE_CODE (field) != FIELD_DECL
+ || DECL_FIELD_ABI_IGNORED (field)))
field = DECL_CHAIN (field);
if (! field)
break;
if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
return false;
+ /* Is this a valid prefixed address? If the bottom four bits of the offset
+ are non-zero, we could use a prefixed instruction (which does not have the
+ DQ-form constraint that the traditional instruction had) instead of
+ forcing the unaligned offset to a GPR. */
+ if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
+ return true;
+
if (GET_CODE (addr) != PLUS)
return false;
return NULL_RTX;
}
+/* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
+ the mode. If we can't find (or don't know) the alignment of the symbol
+ we assume (optimistically) that it's sufficiently aligned [??? maybe we
+ should be pessimistic]. Offsets are validated in the same way as for
+ reg + offset. */
+static bool
+darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
+{
+ /* We should not get here with this. */
+ gcc_checking_assert (! mode_supports_dq_form (mode));
+
+ if (GET_CODE (x) == CONST)
+ x = XEXP (x, 0);
+
+ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
+ x = XVECEXP (x, 0, 0);
+
+ rtx sym = NULL_RTX;
+ unsigned HOST_WIDE_INT offset = 0;
+
+ if (GET_CODE (x) == PLUS)
+ {
+ sym = XEXP (x, 0);
+ if (! SYMBOL_REF_P (sym))
+ return false;
+ if (!CONST_INT_P (XEXP (x, 1)))
+ return false;
+ offset = INTVAL (XEXP (x, 1));
+ }
+ else if (SYMBOL_REF_P (x))
+ sym = x;
+ else if (CONST_INT_P (x))
+ offset = INTVAL (x);
+ else if (GET_CODE (x) == LABEL_REF)
+ offset = 0; // We assume code labels are Pmode aligned
+ else
+ return false; // not sure what we have here.
+
+ /* If we don't know the alignment of the thing to which the symbol refers,
+ we assume optimistically it is "enough".
+ ??? maybe we should be pessimistic instead. */
+ unsigned align = 0;
+
+ if (sym)
+ {
+ tree decl = SYMBOL_REF_DECL (sym);
+#if TARGET_MACHO
+ if (MACHO_SYMBOL_INDIRECTION_P (sym))
+ /* The decl in an indirection symbol is the original one, which might
+ be less aligned than the indirection. Our indirections are always
+ pointer-aligned. */
+ ;
+ else
+#endif
+ if (decl && DECL_ALIGN (decl))
+ align = DECL_ALIGN_UNIT (decl);
+ }
+
+ unsigned int extra = 0;
+ switch (mode)
+ {
+ case E_DFmode:
+ case E_DDmode:
+ case E_DImode:
+ /* If we are using VSX scalar loads, restrict ourselves to reg+reg
+ addressing. */
+ if (VECTOR_MEM_VSX_P (mode))
+ return false;
+
+ if (!TARGET_POWERPC64)
+ extra = 4;
+ else if ((offset & 3) || (align & 3))
+ return false;
+ break;
+
+ case E_TFmode:
+ case E_IFmode:
+ case E_KFmode:
+ case E_TDmode:
+ case E_TImode:
+ case E_PTImode:
+ extra = 8;
+ if (!TARGET_POWERPC64)
+ extra = 12;
+ else if ((offset & 3) || (align & 3))
+ return false;
+ break;
+
+ default:
+ break;
+ }
+
+ /* We only care if the access(es) would cause a change to the high part. */
+ offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
+ return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
+}
+
/* Return true if the MEM operand is a memory operand suitable for use
with a (full width, possibly multiple) gpr load/store. On
powerpc64 this means the offset must be divisible by 4.
&& legitimate_indirect_address_p (XEXP (addr, 0), false))
return true;
- /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
+ /* Allow prefixed instructions if supported. If the bottom two bits of the
+ offset are non-zero, we could use a prefixed instruction (which does not
+ have the DS-form constraint that the traditional instruction had) instead
+ of forcing the unaligned offset to a GPR. */
+ if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
+ return true;
+
+ /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
+ really OK. Doing this early avoids teaching all the other machinery
+ about them. */
+ if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
+ return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
+
+ /* Only allow offsettable addresses. See PRs 83969 and 84279. */
if (!rs6000_offsettable_memref_p (op, mode, false))
return false;
causes a wrap, so test only the low 16 bits. */
offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
- return offset + 0x8000 < 0x10000u - extra;
+ return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
}
/* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
int extra;
rtx addr = XEXP (op, 0);
+ /* Allow prefixed instructions if supported. If the bottom two bits of the
+ offset are non-zero, we could use a prefixed instruction (which does not
+ have the DS-form constraint that the traditional instruction had) instead
+ of forcing the unaligned offset to a GPR. */
+ if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
+ return true;
+
if (!offsettable_address_p (false, mode, addr))
return false;
causes a wrap, so test only the low 16 bits. */
offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
- return offset + 0x8000 < 0x10000u - extra;
+ return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
}
\f
/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
break;
}
- offset += 0x8000;
- return offset < 0x10000 - extra;
+ if (TARGET_PREFIXED)
+ return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
+ else
+ return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
}
bool
if (TARGET_ELF)
emit_insn (gen_elf_high (reg, x));
else
- emit_insn (gen_macho_high (reg, x));
+ emit_insn (gen_macho_high (Pmode, reg, x));
return gen_rtx_LO_SUM (Pmode, reg, x);
}
else if (TARGET_TOC
return dest;
}
-/* Output arg setup instructions for a !TARGET_TLS_MARKERS
- __tls_get_addr call. */
-
-void
-rs6000_output_tlsargs (rtx *operands)
-{
- /* Set up operands for output_asm_insn, without modifying OPERANDS. */
- rtx op[3];
-
- /* The set dest of the call, ie. r3, which is also the first arg reg. */
- op[0] = operands[0];
- /* The TLS symbol from global_tlsarg stashed as CALL operand 2. */
- op[1] = XVECEXP (operands[2], 0, 0);
- if (XINT (operands[2], 1) == UNSPEC_TLSGD)
- {
- /* The GOT register. */
- op[2] = XVECEXP (operands[2], 0, 1);
- if (TARGET_CMODEL != CMODEL_SMALL)
- output_asm_insn ("addis %0,%2,%1@got@tlsgd@ha\n\t"
- "addi %0,%0,%1@got@tlsgd@l", op);
- else
- output_asm_insn ("addi %0,%2,%1@got@tlsgd", op);
- }
- else if (XINT (operands[2], 1) == UNSPEC_TLSLD)
- {
- if (TARGET_CMODEL != CMODEL_SMALL)
- output_asm_insn ("addis %0,%1,%&@got@tlsld@ha\n\t"
- "addi %0,%0,%&@got@tlsld@l", op);
- else
- output_asm_insn ("addi %0,%1,%&@got@tlsld", op);
- }
- else
- gcc_unreachable ();
-}
-
-/* Passes the tls arg value for global dynamic and local dynamic
- emit_library_call_value in rs6000_legitimize_tls_address to
- rs6000_call_aix and rs6000_call_sysv. This is used to emit the
- marker relocs put on __tls_get_addr calls. */
-static rtx global_tlsarg;
+/* Passes the tls arg value for global dynamic and local dynamic
+ emit_library_call_value in rs6000_legitimize_tls_address to
+ rs6000_call_aix and rs6000_call_sysv. This is used to emit the
+ marker relocs put on __tls_get_addr calls. */
+static rtx global_tlsarg;
/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
this (thread-local) address. */
return rs6000_legitimize_tls_address_aix (addr, model);
dest = gen_reg_rtx (Pmode);
- if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
+ if (model == TLS_MODEL_LOCAL_EXEC
+ && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)))
{
rtx tlsreg;
them in the .got section. So use a pointer to the .got section,
not one to secondary TOC sections used by 64-bit -mminimal-toc,
or to secondary GOT sections used by 32-bit -fPIC. */
- if (TARGET_64BIT)
+ if (rs6000_pcrel_p (cfun))
+ got = const0_rtx;
+ else if (TARGET_64BIT)
got = gen_rtx_REG (Pmode, 2);
else
{
rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
UNSPEC_TLSGD);
tga = rs6000_tls_get_addr ();
+ rtx argreg = gen_rtx_REG (Pmode, 3);
+ emit_insn (gen_rtx_SET (argreg, arg));
global_tlsarg = arg;
- if (TARGET_TLS_MARKERS)
- {
- rtx argreg = gen_rtx_REG (Pmode, 3);
- emit_insn (gen_rtx_SET (argreg, arg));
- emit_library_call_value (tga, dest, LCT_CONST, Pmode,
- argreg, Pmode);
- }
- else
- emit_library_call_value (tga, dest, LCT_CONST, Pmode);
+ emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
global_tlsarg = NULL_RTX;
/* Make a note so that the result of this call can be CSEd. */
rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
tga = rs6000_tls_get_addr ();
tmp1 = gen_reg_rtx (Pmode);
+ rtx argreg = gen_rtx_REG (Pmode, 3);
+ emit_insn (gen_rtx_SET (argreg, arg));
global_tlsarg = arg;
- if (TARGET_TLS_MARKERS)
- {
- rtx argreg = gen_rtx_REG (Pmode, 3);
- emit_insn (gen_rtx_SET (argreg, arg));
- emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
- argreg, Pmode);
- }
- else
- emit_library_call_value (tga, tmp1, LCT_CONST, Pmode);
+ emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
global_tlsarg = NULL_RTX;
/* Make a note so that the result of this call can be CSEd. */
rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
- if (rs6000_tls_size == 16)
+ if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))
{
if (TARGET_64BIT)
insn = gen_tls_dtprel_64 (dest, tmp1, addr);
else
insn = gen_tls_got_tprel_32 (tmp2, got, addr);
emit_insn (insn);
- if (TARGET_64BIT)
+ if (rs6000_pcrel_p (cfun))
+ {
+ if (TARGET_64BIT)
+ insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
+ else
+ insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
+ }
+ else if (TARGET_64BIT)
insn = gen_tls_tls_64 (dest, tmp2, addr);
else
insn = gen_tls_tls_32 (dest, tmp2, addr);
bool quad_offset_p = mode_supports_dq_form (mode);
/* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
- if (VECTOR_MEM_ALTIVEC_P (mode)
+ if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
&& GET_CODE (x) == AND
&& CONST_INT_P (XEXP (x, 1))
&& INTVAL (XEXP (x, 1)) == -16)
&& mode_supports_pre_incdec_p (mode)
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
return 1;
+
+ /* Handle prefixed addresses (PC-relative or 34-bit offset). */
+ if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
+ return 1;
+
/* Handle restricted vector d-form offsets in ISA 3.0. */
if (quad_offset_p)
{
|| (!avoiding_indexed_address_p (mode)
&& legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
&& rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
- return 1;
+ {
+ /* There is no prefixed version of the load/store with update. */
+ rtx addr = XEXP (x, 1);
+ return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
+ }
if (reg_offset_p && !quad_offset_p
&& legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
return 1;
&& XEXP (addr, 0) != arg_pointer_rtx
&& CONST_INT_P (XEXP (addr, 1)))
{
- unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
- return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
+ HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
+ HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
+ if (TARGET_PREFIXED)
+ return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
+ else
+ return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
}
break;
gen_lowpart (SImode,
copy_rtx (temp))));
}
+ else if (ud1 == ud3 && ud2 == ud4)
+ {
+ temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+ HOST_WIDE_INT num = (ud2 << 16) | ud1;
+ rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000);
+ rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
+ rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
+ emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
+ }
else if ((ud4 == 0xffff && (ud3 & 0x8000))
|| (ud4 == 0 && ! (ud3 & 0x8000)))
{
return;
}
+ /* Use the default pattern for loading up PC-relative addresses. */
+ if (TARGET_PCREL && mode == Pmode
+ && pcrel_local_or_external_address (operands[1], Pmode))
+ {
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ return;
+ }
+
if (DEFAULT_ABI == ABI_V4
&& mode == Pmode && mode == SImode
&& flag_pic == 1 && got_operand (operands[1], mode))
if (DEFAULT_ABI == ABI_DARWIN)
{
#if TARGET_MACHO
+ /* This is not PIC code, but could require the subset of
+ indirections used by mdynamic-no-pic. */
if (MACHO_DYNAMIC_NO_PIC_P)
{
/* Take care of any required data indirection. */
return;
}
#endif
- emit_insn (gen_macho_high (target, operands[1]));
- emit_insn (gen_macho_low (operands[0], target, operands[1]));
+ emit_insn (gen_macho_high (Pmode, target, operands[1]));
+ emit_insn (gen_macho_low (Pmode, operands[0],
+ target, operands[1]));
return;
}
&& code != UNGT && code != UNLT
&& code != UNGE && code != UNLE));
- /* These should never be generated except for
- flag_finite_math_only. */
- gcc_assert (mode != CCFPmode
- || flag_finite_math_only
- || (code != LE && code != GE
- && code != UNEQ && code != LTGT
- && code != UNGT && code != UNLT));
-
/* These are invalid; the information is not there. */
gcc_assert (mode != CCEQmode || code == EQ || code == NE);
}
if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
return false;
+ /* Allow SD<->DD changes, since SDmode values are stored in
+ the low half of the DDmode, just like target-independent
+ code expects. We need to allow at least SD->DD since
+ rs6000_secondary_memory_needed_mode asks for that change
+ to be made for SD reloads. */
+ if ((to == DDmode && from == SDmode)
+ || (to == SDmode && from == DDmode))
+ return true;
+
if (from_size < 8 || to_size < 8)
return false;
if (REG_P (x))
fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
- /* Is it a pc-relative address? */
- else if (pcrel_address (x, Pmode))
+ /* Is it a PC-relative address? */
+ else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
{
HOST_WIDE_INT offset;
if (offset)
fprintf (file, "%+" PRId64, offset);
- fputs ("@pcrel", file);
+ if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
+ fprintf (file, "@got");
+
+ fprintf (file, "@pcrel");
}
else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
|| GET_CODE (x) == LABEL_REF)
char arg[12];
arg[0] = 0;
- if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
+ if (GET_CODE (operands[funop + 1]) == UNSPEC)
{
if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
sprintf (arg, "(%%%u@tlsgd)", funop + 1);
else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
sprintf (arg, "(%%&@tlsld)");
- else
- gcc_unreachable ();
}
/* The magic 32768 offset here corresponds to the offset of
if (DEFAULT_ABI == ABI_AIX)
s += sprintf (s,
"l%s 2,%%%u\n\t",
- ptrload, funop + 2);
+ ptrload, funop + 3);
/* We don't need the extra code to stop indirect call speculation if
calling via LR. */
const char *rel64 = TARGET_64BIT ? "64" : "";
char tls[29];
tls[0] = 0;
- if (TARGET_TLS_MARKERS && GET_CODE (operands[funop + 1]) == UNSPEC)
+ if (GET_CODE (operands[funop + 1]) == UNSPEC)
{
if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
rel64);
- else
- gcc_unreachable ();
}
const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : "";
sprintf (s,
"b%%T%ul\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 3);
+ funop, ptrload, funop + 4);
else
sprintf (s,
"beq%%T%ul-\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 3);
+ funop, ptrload, funop + 4);
}
else if (DEFAULT_ABI == ABI_ELFv2)
{
sprintf (s,
"b%%T%ul\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 2);
+ funop, ptrload, funop + 3);
else
sprintf (s,
"beq%%T%ul-\n\t"
"l%s 2,%%%u(1)",
- funop, ptrload, funop + 2);
+ funop, ptrload, funop + 3);
}
else
{
const char *rel64 = TARGET_64BIT ? "64" : "";
char tls[30];
tls[0] = 0;
- if (TARGET_TLS_MARKERS && GET_CODE (operands[3]) == UNSPEC)
+ if (GET_CODE (operands[3]) == UNSPEC)
{
char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
if (XINT (operands[3], 1) == UNSPEC_TLSGD)
else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
off, rel64);
- else
- gcc_unreachable ();
}
gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
return str;
}
#endif
-
-/* Helper function to return whether a MODE can do prefixed loads/stores.
- VOIDmode is used when we are loading the pc-relative address into a base
- register, but we are not using it as part of a memory operation. As modes
- add support for prefixed memory, they will be added here. */
-
-static bool
-mode_supports_prefixed_address_p (machine_mode mode)
-{
- return mode == VOIDmode;
-}
-
-/* Function to return true if ADDR is a valid prefixed memory address that uses
- mode MODE. */
-
-bool
-rs6000_prefixed_address_mode_p (rtx addr, machine_mode mode)
-{
- if (!TARGET_PREFIXED_ADDR || !mode_supports_prefixed_address_p (mode))
- return false;
-
- /* Check for PC-relative addresses. */
- if (pcrel_address (addr, Pmode))
- return true;
-
- /* Check for prefixed memory addresses that have a large numeric offset,
- or an offset that can't be used for a DS/DQ-form memory operation. */
- if (GET_CODE (addr) == PLUS)
- {
- rtx op0 = XEXP (addr, 0);
- rtx op1 = XEXP (addr, 1);
-
- if (!base_reg_operand (op0, Pmode) || !CONST_INT_P (op1))
- return false;
-
- HOST_WIDE_INT value = INTVAL (op1);
- if (!SIGNED_34BIT_OFFSET_P (value))
- return false;
-
- /* Offset larger than 16-bits? */
- if (!SIGNED_16BIT_OFFSET_P (value))
- return true;
-
- /* DQ instruction (bottom 4 bits must be 0) for vectors. */
- HOST_WIDE_INT mask;
- if (GET_MODE_SIZE (mode) >= 16)
- mask = 15;
-
- /* DS instruction (bottom 2 bits must be 0). For 32-bit integers, we
- need to use DS instructions if we are sign-extending the value with
- LWA. For 32-bit floating point, we need DS instructions to load and
- store values to the traditional Altivec registers. */
- else if (GET_MODE_SIZE (mode) >= 4)
- mask = 3;
-
- /* QImode/HImode has no restrictions. */
- else
- return true;
-
- /* Return true if we must use a prefixed instruction. */
- return (value & mask) != 0;
- }
-
- return false;
-}
\f
#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
/* Emit an assembler directive to set symbol visibility for DECL to
gen_rtx_COMPARE (comp_mode, op0, op1)));
}
- /* Some kinds of FP comparisons need an OR operation;
- under flag_finite_math_only we don't bother. */
- if (FLOAT_MODE_P (mode)
- && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
- && !flag_finite_math_only
- && (code == LE || code == GE
- || code == UNEQ || code == LTGT
- || code == UNGT || code == UNLT))
- {
- enum rtx_code or1, or2;
- rtx or1_rtx, or2_rtx, compare2_rtx;
- rtx or_result = gen_reg_rtx (CCEQmode);
-
- switch (code)
- {
- case LE: or1 = LT; or2 = EQ; break;
- case GE: or1 = GT; or2 = EQ; break;
- case UNEQ: or1 = UNORDERED; or2 = EQ; break;
- case LTGT: or1 = LT; or2 = GT; break;
- case UNGT: or1 = UNORDERED; or2 = GT; break;
- case UNLT: or1 = UNORDERED; or2 = LT; break;
- default: gcc_unreachable ();
- }
- validate_condition_mode (or1, comp_mode);
- validate_condition_mode (or2, comp_mode);
- or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
- or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
- compare2_rtx = gen_rtx_COMPARE (CCEQmode,
- gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
- const_true_rtx);
- emit_insn (gen_rtx_SET (or_result, compare2_rtx));
-
- compare_result = or_result;
- code = EQ;
- }
-
validate_condition_mode (code, GET_MODE (compare_result));
return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
return scratch;
}
+/* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
+ requires this. The result is mode MODE. */
+rtx
+rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
+{
+ rtx cond[2];
+ int n = 0;
+ if (code == LTGT || code == LE || code == UNLT)
+ cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
+ if (code == LTGT || code == GE || code == UNGT)
+ cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
+ if (code == LE || code == GE || code == UNEQ)
+ cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
+ if (code == UNLT || code == UNGT || code == UNEQ)
+ cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
+
+ gcc_assert (n == 2);
+
+ rtx cc = gen_reg_rtx (CCEQmode);
+ rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
+ emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
+
+ return cc;
+}
+
void
rs6000_emit_sCOND (machine_mode mode, rtx operands[])
{
- rtx condition_rtx;
- machine_mode op_mode;
- enum rtx_code cond_code;
- rtx result = operands[0];
-
- condition_rtx = rs6000_generate_compare (operands[1], mode);
- cond_code = GET_CODE (condition_rtx);
+ rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
+ rtx_code cond_code = GET_CODE (condition_rtx);
- if (cond_code == NE
- || cond_code == GE || cond_code == LE
- || cond_code == GEU || cond_code == LEU
- || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
+ if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
+ && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
+ ;
+ else if (cond_code == NE
+ || cond_code == GE || cond_code == LE
+ || cond_code == GEU || cond_code == LEU
+ || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
{
rtx not_result = gen_reg_rtx (CCEQmode);
rtx not_op, rev_cond_rtx;
condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
}
- op_mode = GET_MODE (XEXP (operands[1], 0));
+ machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
if (op_mode == VOIDmode)
op_mode = GET_MODE (XEXP (operands[1], 1));
if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
{
PUT_MODE (condition_rtx, DImode);
- convert_move (result, condition_rtx, 0);
+ convert_move (operands[0], condition_rtx, 0);
}
else
{
PUT_MODE (condition_rtx, SImode);
- emit_insn (gen_rtx_SET (result, condition_rtx));
+ emit_insn (gen_rtx_SET (operands[0], condition_rtx));
}
}
void
rs6000_emit_cbranch (machine_mode mode, rtx operands[])
{
- rtx condition_rtx, loc_ref;
-
- condition_rtx = rs6000_generate_compare (operands[0], mode);
- loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
- emit_jump_insn (gen_rtx_SET (pc_rtx,
- gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
- loc_ref, pc_rtx)));
+ rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
+ rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+ rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
}
/* Return the string to output a conditional branch to LABEL, which is
if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
;
- else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
+ /* Only when NaNs and signed-zeros are not in effect, smax could be
+ used for `op0 < op1 ? op1 : op0`, and smin could be used for
+ `op0 > op1 ? op1 : op0`. */
+ else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
+ && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
max_p = !max_p;
else
/* At this point we know we can use fsel. */
+ /* Don't allow compare_mode other than SFmode or DFmode, for others there
+ is no fsel instruction. */
+ if (compare_mode != SFmode && compare_mode != DFmode)
+ return 0;
+
/* Reduce the comparison to a comparison against zero. */
if (! is_against_zero)
{
return insn;
}
+/* Move instruction at POS to the end of the READY list. */
+
+static void
+move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
+{
+ rtx_insn *tmp;
+ int i;
+
+ tmp = ready[pos];
+ for (i = pos; i < lastpos; i++)
+ ready[i] = ready[i + 1];
+ ready[lastpos] = tmp;
+}
+
+/* Do Power6 specific sched_reorder2 reordering of ready list. */
+
+static int
+power6_sched_reorder2 (rtx_insn **ready, int lastpos)
+{
+ /* For Power6, we need to handle some special cases to try and keep the
+ store queue from overflowing and triggering expensive flushes.
+
+ This code monitors how load and store instructions are being issued
+ and skews the ready list one way or the other to increase the likelihood
+ that a desired instruction is issued at the proper time.
+
+ A couple of things are done. First, we maintain a "load_store_pendulum"
+ to track the current state of load/store issue.
+
+ - If the pendulum is at zero, then no loads or stores have been
+ issued in the current cycle so we do nothing.
+
+ - If the pendulum is 1, then a single load has been issued in this
+ cycle and we attempt to locate another load in the ready list to
+ issue with it.
+
+ - If the pendulum is -2, then two stores have already been
+ issued in this cycle, so we increase the priority of the first load
+ in the ready list to increase it's likelihood of being chosen first
+ in the next cycle.
+
+ - If the pendulum is -1, then a single store has been issued in this
+ cycle and we attempt to locate another store in the ready list to
+ issue with it, preferring a store to an adjacent memory location to
+ facilitate store pairing in the store queue.
+
+ - If the pendulum is 2, then two loads have already been
+ issued in this cycle, so we increase the priority of the first store
+ in the ready list to increase it's likelihood of being chosen first
+ in the next cycle.
+
+ - If the pendulum < -2 or > 2, then do nothing.
+
+ Note: This code covers the most common scenarios. There exist non
+ load/store instructions which make use of the LSU and which
+ would need to be accounted for to strictly model the behavior
+ of the machine. Those instructions are currently unaccounted
+ for to help minimize compile time overhead of this code.
+ */
+ int pos;
+ rtx load_mem, str_mem;
+
+ if (is_store_insn (last_scheduled_insn, &str_mem))
+ /* Issuing a store, swing the load_store_pendulum to the left */
+ load_store_pendulum--;
+ else if (is_load_insn (last_scheduled_insn, &load_mem))
+ /* Issuing a load, swing the load_store_pendulum to the right */
+ load_store_pendulum++;
+ else
+ return cached_can_issue_more;
+
+ /* If the pendulum is balanced, or there is only one instruction on
+ the ready list, then all is well, so return. */
+ if ((load_store_pendulum == 0) || (lastpos <= 0))
+ return cached_can_issue_more;
+
+ if (load_store_pendulum == 1)
+ {
+ /* A load has been issued in this cycle. Scan the ready list
+ for another load to issue with it */
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_load_insn (ready[pos], &load_mem))
+ {
+ /* Found a load. Move it to the head of the ready list,
+ and adjust it's priority so that it is more likely to
+ stay there */
+ move_to_end_of_ready (ready, pos, lastpos);
+
+ if (!sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[lastpos]))
+ INSN_PRIORITY (ready[lastpos])++;
+ break;
+ }
+ pos--;
+ }
+ }
+ else if (load_store_pendulum == -2)
+ {
+ /* Two stores have been issued in this cycle. Increase the
+ priority of the first load in the ready list to favor it for
+ issuing in the next cycle. */
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_load_insn (ready[pos], &load_mem)
+ && !sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[pos]))
+ {
+ INSN_PRIORITY (ready[pos])++;
+
+ /* Adjust the pendulum to account for the fact that a load
+ was found and increased in priority. This is to prevent
+ increasing the priority of multiple loads */
+ load_store_pendulum--;
+
+ break;
+ }
+ pos--;
+ }
+ }
+ else if (load_store_pendulum == -1)
+ {
+ /* A store has been issued in this cycle. Scan the ready list for
+ another store to issue with it, preferring a store to an adjacent
+ memory location */
+ int first_store_pos = -1;
+
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_store_insn (ready[pos], &str_mem))
+ {
+ rtx str_mem2;
+ /* Maintain the index of the first store found on the
+ list */
+ if (first_store_pos == -1)
+ first_store_pos = pos;
+
+ if (is_store_insn (last_scheduled_insn, &str_mem2)
+ && adjacent_mem_locations (str_mem, str_mem2))
+ {
+ /* Found an adjacent store. Move it to the head of the
+ ready list, and adjust it's priority so that it is
+ more likely to stay there */
+ move_to_end_of_ready (ready, pos, lastpos);
+
+ if (!sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[lastpos]))
+ INSN_PRIORITY (ready[lastpos])++;
+
+ first_store_pos = -1;
+
+ break;
+ };
+ }
+ pos--;
+ }
+
+ if (first_store_pos >= 0)
+ {
+ /* An adjacent store wasn't found, but a non-adjacent store was,
+ so move the non-adjacent store to the front of the ready
+ list, and adjust its priority so that it is more likely to
+ stay there. */
+ move_to_end_of_ready (ready, first_store_pos, lastpos);
+ if (!sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[lastpos]))
+ INSN_PRIORITY (ready[lastpos])++;
+ }
+ }
+ else if (load_store_pendulum == 2)
+ {
+ /* Two loads have been issued in this cycle. Increase the priority
+ of the first store in the ready list to favor it for issuing in
+ the next cycle. */
+ pos = lastpos;
+
+ while (pos >= 0)
+ {
+ if (is_store_insn (ready[pos], &str_mem)
+ && !sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[pos]))
+ {
+ INSN_PRIORITY (ready[pos])++;
+
+ /* Adjust the pendulum to account for the fact that a store
+ was found and increased in priority. This is to prevent
+ increasing the priority of multiple stores */
+ load_store_pendulum++;
+
+ break;
+ }
+ pos--;
+ }
+ }
+
+ return cached_can_issue_more;
+}
+
/* Do Power9 specific sched_reorder2 reordering of ready list. */
static int
power9_sched_reorder2 (rtx_insn **ready, int lastpos)
{
int pos;
- int i;
- rtx_insn *tmp;
enum attr_type type, type2;
type = get_attr_type (last_scheduled_insn);
if (recog_memoized (ready[pos]) >= 0
&& get_attr_type (ready[pos]) == TYPE_DIV)
{
- tmp = ready[pos];
- for (i = pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, pos, lastpos);
break;
}
pos--;
{
/* Found a vector insn to pair with, move it to the
end of the ready list so it is scheduled next. */
- tmp = ready[pos];
- for (i = pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
{
/* Didn't find a vector to pair with but did find a vecload,
move it to the end of the ready list. */
- tmp = ready[vecload_pos];
- for (i = vecload_pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, vecload_pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
{
/* Found a vecload insn to pair with, move it to the
end of the ready list so it is scheduled next. */
- tmp = ready[pos];
- for (i = pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
{
/* Didn't find a vecload to pair with but did find a vector
insn, move it to the end of the ready list. */
- tmp = ready[vec_pos];
- for (i = vec_pos; i < lastpos; i++)
- ready[i] = ready[i + 1];
- ready[lastpos] = tmp;
+ move_to_end_of_ready (ready, vec_pos, lastpos);
vec_pairing = 1;
return cached_can_issue_more;
}
if (sched_verbose)
fprintf (dump, "// rs6000_sched_reorder2 :\n");
- /* For Power6, we need to handle some special cases to try and keep the
- store queue from overflowing and triggering expensive flushes.
+ /* Do Power6 dependent reordering if necessary. */
+ if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
+ return power6_sched_reorder2 (ready, *pn_ready - 1);
- This code monitors how load and store instructions are being issued
- and skews the ready list one way or the other to increase the likelihood
- that a desired instruction is issued at the proper time.
+ /* Do Power9 dependent reordering if necessary. */
+ if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
+ && recog_memoized (last_scheduled_insn) >= 0)
+ return power9_sched_reorder2 (ready, *pn_ready - 1);
- A couple of things are done. First, we maintain a "load_store_pendulum"
- to track the current state of load/store issue.
+ return cached_can_issue_more;
+}
- - If the pendulum is at zero, then no loads or stores have been
- issued in the current cycle so we do nothing.
+/* Return whether the presence of INSN causes a dispatch group termination
+ of group WHICH_GROUP.
- - If the pendulum is 1, then a single load has been issued in this
- cycle and we attempt to locate another load in the ready list to
- issue with it.
+ If WHICH_GROUP == current_group, this function will return true if INSN
+ causes the termination of the current group (i.e, the dispatch group to
+ which INSN belongs). This means that INSN will be the last insn in the
+ group it belongs to.
- - If the pendulum is -2, then two stores have already been
- issued in this cycle, so we increase the priority of the first load
- in the ready list to increase it's likelihood of being chosen first
- in the next cycle.
+ If WHICH_GROUP == previous_group, this function will return true if INSN
+ causes the termination of the previous group (i.e, the dispatch group that
+ precedes the group to which INSN belongs). This means that INSN will be
+ the first insn in the group it belongs to). */
- - If the pendulum is -1, then a single store has been issued in this
- cycle and we attempt to locate another store in the ready list to
- issue with it, preferring a store to an adjacent memory location to
- facilitate store pairing in the store queue.
+static bool
+insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
+{
+ bool first, last;
- - If the pendulum is 2, then two loads have already been
- issued in this cycle, so we increase the priority of the first store
- in the ready list to increase it's likelihood of being chosen first
- in the next cycle.
+ if (! insn)
+ return false;
- - If the pendulum < -2 or > 2, then do nothing.
-
- Note: This code covers the most common scenarios. There exist non
- load/store instructions which make use of the LSU and which
- would need to be accounted for to strictly model the behavior
- of the machine. Those instructions are currently unaccounted
- for to help minimize compile time overhead of this code.
- */
- if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
- {
- int pos;
- int i;
- rtx_insn *tmp;
- rtx load_mem, str_mem;
-
- if (is_store_insn (last_scheduled_insn, &str_mem))
- /* Issuing a store, swing the load_store_pendulum to the left */
- load_store_pendulum--;
- else if (is_load_insn (last_scheduled_insn, &load_mem))
- /* Issuing a load, swing the load_store_pendulum to the right */
- load_store_pendulum++;
- else
- return cached_can_issue_more;
-
- /* If the pendulum is balanced, or there is only one instruction on
- the ready list, then all is well, so return. */
- if ((load_store_pendulum == 0) || (*pn_ready <= 1))
- return cached_can_issue_more;
-
- if (load_store_pendulum == 1)
- {
- /* A load has been issued in this cycle. Scan the ready list
- for another load to issue with it */
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_load_insn (ready[pos], &load_mem))
- {
- /* Found a load. Move it to the head of the ready list,
- and adjust it's priority so that it is more likely to
- stay there */
- tmp = ready[pos];
- for (i=pos; i<*pn_ready-1; i++)
- ready[i] = ready[i + 1];
- ready[*pn_ready-1] = tmp;
-
- if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
- INSN_PRIORITY (tmp)++;
- break;
- }
- pos--;
- }
- }
- else if (load_store_pendulum == -2)
- {
- /* Two stores have been issued in this cycle. Increase the
- priority of the first load in the ready list to favor it for
- issuing in the next cycle. */
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_load_insn (ready[pos], &load_mem)
- && !sel_sched_p ()
- && INSN_PRIORITY_KNOWN (ready[pos]))
- {
- INSN_PRIORITY (ready[pos])++;
-
- /* Adjust the pendulum to account for the fact that a load
- was found and increased in priority. This is to prevent
- increasing the priority of multiple loads */
- load_store_pendulum--;
-
- break;
- }
- pos--;
- }
- }
- else if (load_store_pendulum == -1)
- {
- /* A store has been issued in this cycle. Scan the ready list for
- another store to issue with it, preferring a store to an adjacent
- memory location */
- int first_store_pos = -1;
-
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_store_insn (ready[pos], &str_mem))
- {
- rtx str_mem2;
- /* Maintain the index of the first store found on the
- list */
- if (first_store_pos == -1)
- first_store_pos = pos;
-
- if (is_store_insn (last_scheduled_insn, &str_mem2)
- && adjacent_mem_locations (str_mem, str_mem2))
- {
- /* Found an adjacent store. Move it to the head of the
- ready list, and adjust it's priority so that it is
- more likely to stay there */
- tmp = ready[pos];
- for (i=pos; i<*pn_ready-1; i++)
- ready[i] = ready[i + 1];
- ready[*pn_ready-1] = tmp;
-
- if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
- INSN_PRIORITY (tmp)++;
-
- first_store_pos = -1;
-
- break;
- };
- }
- pos--;
- }
-
- if (first_store_pos >= 0)
- {
- /* An adjacent store wasn't found, but a non-adjacent store was,
- so move the non-adjacent store to the front of the ready
- list, and adjust its priority so that it is more likely to
- stay there. */
- tmp = ready[first_store_pos];
- for (i=first_store_pos; i<*pn_ready-1; i++)
- ready[i] = ready[i + 1];
- ready[*pn_ready-1] = tmp;
- if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
- INSN_PRIORITY (tmp)++;
- }
- }
- else if (load_store_pendulum == 2)
- {
- /* Two loads have been issued in this cycle. Increase the priority
- of the first store in the ready list to favor it for issuing in
- the next cycle. */
- pos = *pn_ready-1;
-
- while (pos >= 0)
- {
- if (is_store_insn (ready[pos], &str_mem)
- && !sel_sched_p ()
- && INSN_PRIORITY_KNOWN (ready[pos]))
- {
- INSN_PRIORITY (ready[pos])++;
-
- /* Adjust the pendulum to account for the fact that a store
- was found and increased in priority. This is to prevent
- increasing the priority of multiple stores */
- load_store_pendulum++;
-
- break;
- }
- pos--;
- }
- }
- }
-
- /* Do Power9 dependent reordering if necessary. */
- if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
- && recog_memoized (last_scheduled_insn) >= 0)
- return power9_sched_reorder2 (ready, *pn_ready - 1);
-
- return cached_can_issue_more;
-}
-
-/* Return whether the presence of INSN causes a dispatch group termination
- of group WHICH_GROUP.
-
- If WHICH_GROUP == current_group, this function will return true if INSN
- causes the termination of the current group (i.e, the dispatch group to
- which INSN belongs). This means that INSN will be the last insn in the
- group it belongs to.
-
- If WHICH_GROUP == previous_group, this function will return true if INSN
- causes the termination of the previous group (i.e, the dispatch group that
- precedes the group to which INSN belongs). This means that INSN will be
- the first insn in the group it belongs to). */
-
-static bool
-insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
-{
- bool first, last;
-
- if (! insn)
- return false;
-
- first = insn_must_be_first_in_group (insn);
- last = insn_must_be_last_in_group (insn);
+ first = insn_must_be_first_in_group (insn);
+ last = insn_must_be_last_in_group (insn);
if (first && last)
return true;
if (rs6000_pcrel_p (cfun))
{
rtx reg = gen_rtx_REG (Pmode, regno);
- rtx u = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
- UNSPEC_PLT_PCREL);
+ rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (3, base, call_ref, arg),
+ UNSPECV_PLT_PCREL);
emit_insn (gen_rtx_SET (reg, u));
return reg;
}
rtx reg = gen_rtx_REG (Pmode, regno);
rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
UNSPEC_PLT16_HA);
- rtx lo = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, reg, call_ref, arg),
- UNSPEC_PLT16_LO);
+ rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
+ gen_rtvec (3, reg, call_ref, arg),
+ UNSPECV_PLT16_LO);
emit_insn (gen_rtx_SET (reg, hi));
emit_insn (gen_rtx_SET (reg, lo));
return reg;
/* Lose our funky encoding stuff so it doesn't contaminate the stub. */
symb = (*targetm.strip_name_encoding) (symb);
-
length = strlen (symb);
symbol_name = XALLOCAVEC (char, length + 32);
GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
lazy_ptr_name = XALLOCAVEC (char, length + 32);
GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
- if (flag_pic == 2)
- switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
- else
- switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
-
- if (flag_pic == 2)
+ if (MACHOPIC_PURE)
{
+ switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
fprintf (file, "\t.align 5\n");
fprintf (file, "%s:\n", stub);
sprintf (local_label_0, "L%u$spb", label);
fprintf (file, "\tmflr r0\n");
- if (TARGET_LINK_STACK)
- {
- char name[32];
- get_ppc476_thunk_name (name);
- fprintf (file, "\tbl %s\n", name);
- fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
- }
- else
- {
- fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
- fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
- }
+ fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
+ fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
lazy_ptr_name, local_label_0);
fprintf (file, "\tmtlr r0\n");
fprintf (file, "\tmtctr r12\n");
fprintf (file, "\tbctr\n");
}
- else
+ else /* mdynamic-no-pic or mkernel. */
{
+ switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
fprintf (file, "\t.align 4\n");
fprintf (file, "%s:\n", stub);
if (recog_memoized (insn) < 0)
return 0;
+ /* If we are optimizing for size, just use the length. */
if (!speed)
return get_attr_length (insn);
+ /* Use the cost if provided. */
int cost = get_attr_cost (insn);
if (cost > 0)
return cost;
- int n = get_attr_length (insn) / 4;
+ /* If the insn tells us how many insns there are, use that. Otherwise use
+ the length/4. Adjust the insn length to remove the extra size that
+ prefixed instructions take. */
+ int n = get_attr_num_insns (insn);
+ if (n == 0)
+ {
+ int length = get_attr_length (insn);
+ if (get_attr_prefixed (insn) == PREFIXED_YES)
+ {
+ int adjust = 0;
+ ADJUST_INSN_LENGTH (insn, adjust);
+ length -= adjust;
+ }
+
+ n = length / 4;
+ }
+
enum attr_type type = get_attr_type (insn);
switch (type)
{ "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
- { "prefixed-addr", OPTION_MASK_PREFIXED_ADDR, false, true },
+ { "prefixed", OPTION_MASK_PREFIXED, false, true },
{ "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
{ "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "align-branch-targets",
offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
- { "tls-markers",
- offsetof (struct gcc_options, x_tls_markers),
- offsetof (struct cl_target_option, x_tls_markers), },
{ "sched-prolog",
offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
if ((flags & mask) == 0)
{
no_str = "no-";
- len += sizeof ("no-") - 1;
+ len += strlen ("no-");
}
flags &= ~mask;
if ((flags & mask) != 0)
{
no_str = "no-";
- len += sizeof ("no-") - 1;
+ len += strlen ("no-");
}
flags |= mask;
fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
comma = ", ";
- comma_len = sizeof (", ") - 1;
+ comma_len = strlen (", ");
}
fputs ("\n", file);
{ OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
{ OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
{ OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
+ { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
};
for (i = 0; i < ARRAY_SIZE (flags); i++)
DECL_INITIAL (decl) = make_node (BLOCK);
DECL_STATIC_CONSTRUCTOR (decl) = 0;
+ if (DECL_COMDAT_GROUP (default_decl)
+ || TREE_PUBLIC (default_decl))
+ {
+ /* In this case, each translation unit with a call to this
+ versioned function will put out a resolver. Ensure it
+ is comdat to keep just one copy. */
+ DECL_COMDAT (decl) = 1;
+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+ }
+ else
+ TREE_PUBLIC (dispatch_decl) = 0;
+
/* Build result decl and add to function_decl. */
tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
DECL_CONTEXT (t) = decl;
tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
- /* If callee has no option attributes, then it is ok to inline. */
+ /* If the callee has no option attributes, then it is ok to inline. */
if (!callee_tree)
ret = true;
- /* If caller has no option attributes, but callee does then it is not ok to
- inline. */
- else if (!caller_tree)
- ret = false;
-
else
{
- struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
+ HOST_WIDE_INT caller_isa;
struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
+ HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
+ HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
- /* Callee's options should a subset of the caller's, i.e. a vsx function
- can inline an altivec function but a non-vsx function can't inline a
- vsx function. */
- if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
- == callee_opts->x_rs6000_isa_flags)
+ /* If the caller has option attributes, then use them.
+ Otherwise, use the command line options. */
+ if (caller_tree)
+ caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags;
+ else
+ caller_isa = rs6000_isa_flags;
+
+ /* The callee's options must be a subset of the caller's options, i.e.
+ a vsx function may inline an altivec function, but a no-vsx function
+ must not inline a vsx function. However, for those options that the
+ callee has explicitly enabled or disabled, then we must enforce that
+ the callee's and caller's options match exactly; see PR70010. */
+ if (((caller_isa & callee_isa) == callee_isa)
+ && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
ret = true;
}
rtx toc_restore = NULL_RTX;
rtx func_addr;
rtx abi_reg = NULL_RTX;
- rtx call[4];
+ rtx call[5];
int n_call;
rtx insn;
bool is_pltseq_longcall;
call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
if (value != NULL_RTX)
call[0] = gen_rtx_SET (value, call[0]);
- n_call = 1;
+ call[1] = gen_rtx_USE (VOIDmode, cookie);
+ n_call = 2;
if (toc_load)
call[n_call++] = toc_load;
return rs6000_fndecl_pcrel_p (fn->decl);
}
+\f
+/* Given an address (ADDR), a mode (MODE), and what the format of the
+ non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
+ for the address. */
+
+enum insn_form
+address_to_insn_form (rtx addr,
+ machine_mode mode,
+ enum non_prefixed_form non_prefixed_format)
+{
+ /* Single register is easy. */
+ if (REG_P (addr) || SUBREG_P (addr))
+ return INSN_FORM_BASE_REG;
+
+ /* If the non prefixed instruction format doesn't support offset addressing,
+ make sure only indexed addressing is allowed.
+
+ We special case SDmode so that the register allocator does not try to move
+ SDmode through GPR registers, but instead uses the 32-bit integer load and
+ store instructions for the floating point registers. */
+ if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
+ {
+ if (GET_CODE (addr) != PLUS)
+ return INSN_FORM_BAD;
+
+ rtx op0 = XEXP (addr, 0);
+ rtx op1 = XEXP (addr, 1);
+ if (!REG_P (op0) && !SUBREG_P (op0))
+ return INSN_FORM_BAD;
+
+ if (!REG_P (op1) && !SUBREG_P (op1))
+ return INSN_FORM_BAD;
+
+ return INSN_FORM_X;
+ }
+
+ /* Deal with update forms. */
+ if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
+ return INSN_FORM_UPDATE;
+
+ /* Handle PC-relative symbols and labels. Check for both local and
+ external symbols. Assume labels are always local. TLS symbols
+ are not PC-relative for rs6000. */
+ if (TARGET_PCREL)
+ {
+ if (LABEL_REF_P (addr))
+ return INSN_FORM_PCREL_LOCAL;
+
+ if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
+ {
+ if (!SYMBOL_REF_LOCAL_P (addr))
+ return INSN_FORM_PCREL_EXTERNAL;
+ else
+ return INSN_FORM_PCREL_LOCAL;
+ }
+ }
+
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+
+ /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
+ if (GET_CODE (addr) == LO_SUM)
+ return INSN_FORM_LO_SUM;
+
+ /* Everything below must be an offset address of some form. */
+ if (GET_CODE (addr) != PLUS)
+ return INSN_FORM_BAD;
+
+ rtx op0 = XEXP (addr, 0);
+ rtx op1 = XEXP (addr, 1);
+
+ /* Check for indexed addresses. */
+ if (REG_P (op1) || SUBREG_P (op1))
+ {
+ if (REG_P (op0) || SUBREG_P (op0))
+ return INSN_FORM_X;
+
+ return INSN_FORM_BAD;
+ }
+
+ if (!CONST_INT_P (op1))
+ return INSN_FORM_BAD;
+
+ HOST_WIDE_INT offset = INTVAL (op1);
+ if (!SIGNED_INTEGER_34BIT_P (offset))
+ return INSN_FORM_BAD;
+
+ /* Check for local and external PC-relative addresses. Labels are always
+ local. TLS symbols are not PC-relative for rs6000. */
+ if (TARGET_PCREL)
+ {
+ if (LABEL_REF_P (op0))
+ return INSN_FORM_PCREL_LOCAL;
+
+ if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
+ {
+ if (!SYMBOL_REF_LOCAL_P (op0))
+ return INSN_FORM_PCREL_EXTERNAL;
+ else
+ return INSN_FORM_PCREL_LOCAL;
+ }
+ }
+
+ /* If it isn't PC-relative, the address must use a base register. */
+ if (!REG_P (op0) && !SUBREG_P (op0))
+ return INSN_FORM_BAD;
+
+ /* Large offsets must be prefixed. */
+ if (!SIGNED_INTEGER_16BIT_P (offset))
+ {
+ if (TARGET_PREFIXED)
+ return INSN_FORM_PREFIXED_NUMERIC;
+
+ return INSN_FORM_BAD;
+ }
+
+ /* We have a 16-bit offset, see what default instruction format to use. */
+ if (non_prefixed_format == NON_PREFIXED_DEFAULT)
+ {
+ unsigned size = GET_MODE_SIZE (mode);
+
+ /* On 64-bit systems, assume 64-bit integers need to use DS form
+ addresses (for LD/STD). VSX vectors need to use DQ form addresses
+ (for LXV and STXV). TImode is problematical in that its normal usage
+ is expected to be GPRs where it wants a DS instruction format, but if
+ it goes into the vector registers, it wants a DQ instruction
+ format. */
+ if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
+ non_prefixed_format = NON_PREFIXED_DS;
+
+ else if (TARGET_VSX && size >= 16
+ && (VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)))
+ non_prefixed_format = NON_PREFIXED_DQ;
+
+ else
+ non_prefixed_format = NON_PREFIXED_D;
+ }
+
+ /* Classify the D/DS/DQ-form addresses. */
+ switch (non_prefixed_format)
+ {
+ /* Instruction format D, all 16 bits are valid. */
+ case NON_PREFIXED_D:
+ return INSN_FORM_D;
+
+ /* Instruction format DS, bottom 2 bits must be 0. */
+ case NON_PREFIXED_DS:
+ if ((offset & 3) == 0)
+ return INSN_FORM_DS;
+
+ else if (TARGET_PREFIXED)
+ return INSN_FORM_PREFIXED_NUMERIC;
+
+ else
+ return INSN_FORM_BAD;
+
+ /* Instruction format DQ, bottom 4 bits must be 0. */
+ case NON_PREFIXED_DQ:
+ if ((offset & 15) == 0)
+ return INSN_FORM_DQ;
+
+ else if (TARGET_PREFIXED)
+ return INSN_FORM_PREFIXED_NUMERIC;
+
+ else
+ return INSN_FORM_BAD;
+
+ default:
+ break;
+ }
+
+ return INSN_FORM_BAD;
+}
+
+/* Helper function to see if we're potentially looking at lfs/stfs.
+ - PARALLEL containing a SET and a CLOBBER
+ - stfs:
+ - SET is from UNSPEC_SI_FROM_SF to MEM:SI
+ - CLOBBER is a V4SF
+ - lfs:
+ - SET is from UNSPEC_SF_FROM_SI to REG:SF
+ - CLOBBER is a DI
+ */
+
+static bool
+is_lfs_stfs_insn (rtx_insn *insn)
+{
+ rtx pattern = PATTERN (insn);
+ if (GET_CODE (pattern) != PARALLEL)
+ return false;
+
+ /* This should be a parallel with exactly one set and one clobber. */
+ if (XVECLEN (pattern, 0) != 2)
+ return false;
+
+ rtx set = XVECEXP (pattern, 0, 0);
+ if (GET_CODE (set) != SET)
+ return false;
+
+ rtx clobber = XVECEXP (pattern, 0, 1);
+ if (GET_CODE (clobber) != CLOBBER)
+ return false;
+
+ /* All we care is that the destination of the SET is a mem:SI,
+ the source should be an UNSPEC_SI_FROM_SF, and the clobber
+ should be a scratch:V4SF. */
+
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+ rtx scratch = SET_DEST (clobber);
+
+ if (GET_CODE (src) != UNSPEC)
+ return false;
+
+ /* stfs case. */
+ if (XINT (src, 1) == UNSPEC_SI_FROM_SF
+ && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
+ && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
+ return true;
+
+ /* lfs case. */
+ if (XINT (src, 1) == UNSPEC_SF_FROM_SI
+ && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
+ && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
+ return true;
+
+ return false;
+}
+
+/* Helper function to take a REG and a MODE and turn it into the non-prefixed
+ instruction format (D/DS/DQ) used for offset memory. */
+
+static enum non_prefixed_form
+reg_to_non_prefixed (rtx reg, machine_mode mode)
+{
+ /* If it isn't a register, use the defaults. */
+ if (!REG_P (reg) && !SUBREG_P (reg))
+ return NON_PREFIXED_DEFAULT;
+
+ unsigned int r = reg_or_subregno (reg);
+
+ /* If we have a pseudo, use the default instruction format. */
+ if (!HARD_REGISTER_NUM_P (r))
+ return NON_PREFIXED_DEFAULT;
+
+ unsigned size = GET_MODE_SIZE (mode);
+
+ /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
+ 128-bit floating point, and 128-bit integers. Before power9, only indexed
+ addressing was available for vectors. */
+ if (FP_REGNO_P (r))
+ {
+ if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
+ return NON_PREFIXED_D;
+
+ else if (size < 8)
+ return NON_PREFIXED_X;
+
+ else if (TARGET_VSX && size >= 16
+ && (VECTOR_MODE_P (mode)
+ || FLOAT128_VECTOR_P (mode)
+ || mode == TImode || mode == CTImode))
+ return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
+
+ else
+ return NON_PREFIXED_DEFAULT;
+ }
+
+ /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
+ 128-bit floating point, and 128-bit integers. Before power9, only indexed
+ addressing was available. */
+ else if (ALTIVEC_REGNO_P (r))
+ {
+ if (!TARGET_P9_VECTOR)
+ return NON_PREFIXED_X;
+
+ if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
+ return NON_PREFIXED_DS;
+
+ else if (size < 8)
+ return NON_PREFIXED_X;
+
+ else if (TARGET_VSX && size >= 16
+ && (VECTOR_MODE_P (mode)
+ || FLOAT128_VECTOR_P (mode)
+ || mode == TImode || mode == CTImode))
+ return NON_PREFIXED_DQ;
+
+ else
+ return NON_PREFIXED_DEFAULT;
+ }
+
+ /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
+ otherwise. Assume that any other register, such as LR, CRs, etc. will go
+ through the GPR registers for memory operations. */
+ else if (TARGET_POWERPC64 && size >= 8)
+ return NON_PREFIXED_DS;
+
+ return NON_PREFIXED_D;
+}
+
+\f
+/* Whether a load instruction is a prefixed instruction. This is called from
+ the prefixed attribute processing. */
+
+bool
+prefixed_load_p (rtx_insn *insn)
+{
+ /* Validate the insn to make sure it is a normal load insn. */
+ extract_insn_cached (insn);
+ if (recog_data.n_operands < 2)
+ return false;
+
+ rtx reg = recog_data.operand[0];
+ rtx mem = recog_data.operand[1];
+
+ if (!REG_P (reg) && !SUBREG_P (reg))
+ return false;
+
+ if (!MEM_P (mem))
+ return false;
+
+ /* Prefixed load instructions do not support update or indexed forms. */
+ if (get_attr_indexed (insn) == INDEXED_YES
+ || get_attr_update (insn) == UPDATE_YES)
+ return false;
+
+ /* LWA uses the DS format instead of the D format that LWZ uses. */
+ enum non_prefixed_form non_prefixed;
+ machine_mode reg_mode = GET_MODE (reg);
+ machine_mode mem_mode = GET_MODE (mem);
+
+ if (mem_mode == SImode && reg_mode == DImode
+ && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
+ non_prefixed = NON_PREFIXED_DS;
+
+ else
+ non_prefixed = reg_to_non_prefixed (reg, mem_mode);
+
+ if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+ return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
+ else
+ return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
+}
+
+/* Whether a store instruction is a prefixed instruction. This is called from
+ the prefixed attribute processing. */
+
+bool
+prefixed_store_p (rtx_insn *insn)
+{
+ /* Validate the insn to make sure it is a normal store insn. */
+ extract_insn_cached (insn);
+ if (recog_data.n_operands < 2)
+ return false;
+
+ rtx mem = recog_data.operand[0];
+ rtx reg = recog_data.operand[1];
+
+ if (!REG_P (reg) && !SUBREG_P (reg))
+ return false;
+
+ if (!MEM_P (mem))
+ return false;
+
+ /* Prefixed store instructions do not support update or indexed forms. */
+ if (get_attr_indexed (insn) == INDEXED_YES
+ || get_attr_update (insn) == UPDATE_YES)
+ return false;
+
+ machine_mode mem_mode = GET_MODE (mem);
+ rtx addr = XEXP (mem, 0);
+ enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
+
+ /* Need to make sure we aren't looking at a stfs which doesn't look
+ like the other things reg_to_non_prefixed/address_is_prefixed
+ looks for. */
+ if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+ return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
+ else
+ return address_is_prefixed (addr, mem_mode, non_prefixed);
+}
+
+/* Whether a load immediate or add instruction is a prefixed instruction. This
+ is called from the prefixed attribute processing. */
+
+bool
+prefixed_paddi_p (rtx_insn *insn)
+{
+ rtx set = single_set (insn);
+ if (!set)
+ return false;
+
+ rtx dest = SET_DEST (set);
+ rtx src = SET_SRC (set);
+
+ if (!REG_P (dest) && !SUBREG_P (dest))
+ return false;
+
+ /* Is this a load immediate that can't be done with a simple ADDI or
+ ADDIS? */
+ if (CONST_INT_P (src))
+ return (satisfies_constraint_eI (src)
+ && !satisfies_constraint_I (src)
+ && !satisfies_constraint_L (src));
+
+ /* Is this a PADDI instruction that can't be done with a simple ADDI or
+ ADDIS? */
+ if (GET_CODE (src) == PLUS)
+ {
+ rtx op1 = XEXP (src, 1);
+
+ return (CONST_INT_P (op1)
+ && satisfies_constraint_eI (op1)
+ && !satisfies_constraint_I (op1)
+ && !satisfies_constraint_L (op1));
+ }
+
+ /* If not, is it a load of a PC-relative address? */
+ if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
+ return false;
+
+ if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
+ return false;
+
+ enum insn_form iform = address_to_insn_form (src, Pmode,
+ NON_PREFIXED_DEFAULT);
+
+ return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
+}
+
+/* Whether the next instruction needs a 'p' prefix issued before the
+ instruction is printed out. */
+static bool next_insn_prefixed_p;
+
+/* Define FINAL_PRESCAN_INSN if some processing needs to be done before
+ outputting the assembler code. On the PowerPC, we remember if the current
+ insn is a prefixed insn where we need to emit a 'p' before the insn.
+
+ In addition, if the insn is part of a PC-relative reference to an external
+ label optimization, this is recorded also. */
+void
+rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
+{
+ next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO);
+ return;
+}
+
+/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
+ We use it to emit a 'p' for prefixed insns that is set in
+ FINAL_PRESCAN_INSN. */
+void
+rs6000_asm_output_opcode (FILE *stream)
+{
+ if (next_insn_prefixed_p)
+ fprintf (stream, "p");
+
+ return;
+}
+
+/* Adjust the length of an INSN. LENGTH is the currently-computed length and
+ should be adjusted to reflect any required changes. This macro is used when
+ there is some systematic length adjustment required that would be difficult
+ to express in the length attribute.
+
+ In the PowerPC, we use this to adjust the length of an instruction if one or
+ more prefixed instructions are generated, using the attribute
+ num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
+ hardware requires that a prefied instruciton does not cross a 64-byte
+ boundary. This means the compiler has to assume the length of the first
+ prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
+ already set for the non-prefixed instruction, we just need to udpate for the
+ difference. */
+
+int
+rs6000_adjust_insn_length (rtx_insn *insn, int length)
+{
+ if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
+ {
+ rtx pattern = PATTERN (insn);
+ if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
+ && get_attr_prefixed (insn) == PREFIXED_YES)
+ {
+ int num_prefixed = get_attr_max_prefixed_insns (insn);
+ length += 4 * (num_prefixed + 1);
+ }
+ }
+
+ return length;
+}
+
+\f
#ifdef HAVE_GAS_HIDDEN
# define USE_HIDDEN_LINKONCE 1
#else
tree fenv_var = create_tmp_var_raw (double_type_node);
TREE_ADDRESSABLE (fenv_var) = 1;
- tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
+ tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
+ build4 (TARGET_EXPR, double_type_node, fenv_var,
+ void_node, NULL_TREE, NULL_TREE));
*hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
*clear = build_call_expr (atomic_clear_decl, 0);
/* Mask to clear everything except for the rounding modes and non-IEEE
arithmetic flag. */
- const unsigned HOST_WIDE_INT hold_exception_mask =
- HOST_WIDE_INT_C (0xffffffff00000007);
+ const unsigned HOST_WIDE_INT hold_exception_mask
+ = HOST_WIDE_INT_C (0xffffffff00000007);
tree fenv_var = create_tmp_var_raw (double_type_node);
- tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
+ tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
+ NULL_TREE, NULL_TREE);
tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
/* Mask to clear everything except for the rounding modes and non-IEEE
arithmetic flag. */
- const unsigned HOST_WIDE_INT clear_exception_mask =
- HOST_WIDE_INT_C (0xffffffff00000000);
+ const unsigned HOST_WIDE_INT clear_exception_mask
+ = HOST_WIDE_INT_C (0xffffffff00000000);
tree fenv_clear = create_tmp_var_raw (double_type_node);
- tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
+ tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
+ call_mffs, NULL_TREE, NULL_TREE);
tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
(*(uint64_t*)fenv_var 0x1ff80fff);
__builtin_mtfsf (0xff, fenv_update); */
- const unsigned HOST_WIDE_INT update_exception_mask =
- HOST_WIDE_INT_C (0xffffffff1fffff00);
- const unsigned HOST_WIDE_INT new_exception_mask =
- HOST_WIDE_INT_C (0x1ff80fff);
+ const unsigned HOST_WIDE_INT update_exception_mask
+ = HOST_WIDE_INT_C (0xffffffff1fffff00);
+ const unsigned HOST_WIDE_INT new_exception_mask
+ = HOST_WIDE_INT_C (0x1ff80fff);
tree old_fenv = create_tmp_var_raw (double_type_node);
- tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
+ tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
+ call_mffs, NULL_TREE, NULL_TREE);
tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
rtx_tmp2 = gen_reg_rtx (V4SFmode);
rtx_tmp3 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
- emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
+ emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
+ emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
if (BYTES_BIG_ENDIAN)
emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
return true;
}
+/* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
+
+static bool
+rs6000_cannot_substitute_mem_equiv_p (rtx mem)
+{
+ gcc_assert (MEM_P (mem));
+
+ /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
+ type addresses, so don't allow MEMs with those address types to be
+ substituted as an equivalent expression. See PR93974 for details. */
+ if (GET_CODE (XEXP (mem, 0)) == AND)
+ return true;
+
+ return false;
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rs6000.h"