m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
/* X86_TUNE_DEEP_BRANCH_PREDICTION */
- m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
- | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
/* X86_TUNE_DOUBLE_WITH_ADD */
~m_386,
-
+
/* X86_TUNE_USE_SAHF */
m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_PARTIAL_FLAG_REG_STALL */
m_CORE2 | m_GENERIC,
-
+
/* X86_TUNE_USE_HIMODE_FIOP */
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_MOV0 */
m_K6,
-
+
/* X86_TUNE_USE_CLTD */
~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
/* X86_TUNE_SINGLE_STRINGOP */
m_386 | m_PENT4 | m_NOCONA,
-
+
/* X86_TUNE_QIMODE_MATH */
~0,
-
+
/* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
might be considered for Generic32 if our scheme for avoiding partial
m_ATHLON_K8_AMDFAM10,
/* X86_TUNE_USE_INCDEC */
- ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
+ ~(m_PENT4 | m_NOCONA | m_GENERIC),
/* X86_TUNE_PAD_RETURNS */
m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
{&core2_cost, 0, 0, 16, 7, 16, 7, 16},
{&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
{&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
- {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
+ {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
};
static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
static bool
type_has_variadic_args_p (tree type)
{
- tree t;
+ tree n, t = TYPE_ARG_TYPES (type);
- for (t = TYPE_ARG_TYPES (type); t; t = TREE_CHAIN (t))
- if (t == void_list_node)
- return false;
- return true;
+ if (t == NULL)
+ return false;
+
+ while ((n = TREE_CHAIN (t)) != NULL)
+ t = n;
+
+ return TREE_VALUE (t) != void_type_node;
}
/* Value is the number of bytes of arguments automatically
|| (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
regno = TARGET_SSE ? FIRST_SSE_REG : 0;
- /* Decimal floating point values can go in %eax, unlike other float modes. */
- else if (DECIMAL_FLOAT_MODE_P (mode))
- regno = 0;
-
- /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
- else if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
+ /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
+ else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
+ regno = FIRST_FLOAT_REG;
+ else
+ /* Most things go in %eax. */
regno = 0;
-
- /* Floating point return values in %st(0), except for local functions when
+
+ /* Override FP return register with %xmm0 for local functions when
SSE math is enabled or for functions with sseregparm attribute. */
- else
+ if ((fn || fntype) && (mode == SFmode || mode == DFmode))
{
- regno = FIRST_FLOAT_REG;
-
- if ((fn || fntype) && (mode == SFmode || mode == DFmode))
- {
- int sse_level = ix86_function_sseregparm (fntype, fn);
- if ((sse_level >= 1 && mode == SFmode)
- || (sse_level == 2 && mode == DFmode))
- regno = FIRST_SSE_REG;
- }
+ int sse_level = ix86_function_sseregparm (fntype, fn);
+ if ((sse_level >= 1 && mode == SFmode)
+ || (sse_level == 2 && mode == DFmode))
+ regno = FIRST_SSE_REG;
}
return gen_rtx_REG (orig_mode, regno);
return return_in_memory_32 (type, mode);
}
+/* Return false iff TYPE is returned in memory. This version is used
+ on Solaris 10. It is similar to the generic ix86_return_in_memory,
+ but differs notably in that when MMX is available, 8-byte vectors
+ are returned in memory, rather than in MMX registers. */
+
+int
+ix86_sol10_return_in_memory (tree type)
+{
+ int size;
+ enum machine_mode mode = type_natural_mode (type);
+
+ if (TARGET_64BIT)
+ return return_in_memory_64 (type, mode);
+
+ if (mode == BLKmode)
+ return 1;
+
+ size = int_size_in_bytes (type);
+
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Return in memory only if MMX registers *are* available. This
+ seems backwards, but it is consistent with the existing
+ Solaris x86 ABI. */
+ if (size == 8)
+ return TARGET_MMX;
+ if (size == 16)
+ return !TARGET_SSE;
+ }
+ else if (mode == TImode)
+ return !TARGET_SSE;
+ else if (mode == XFmode)
+ return 0;
+
+ return size > 12;
+}
+
/* When returning SSE vector types, we have a choice of either
(1) being abi incompatible with a -march switch, or
(2) generating an error.
int
standard_80387_constant_p (rtx x)
{
+ enum machine_mode mode = GET_MODE (x);
+
REAL_VALUE_TYPE r;
- if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
+ if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
return -1;
- if (x == CONST0_RTX (GET_MODE (x)))
+ if (x == CONST0_RTX (mode))
return 1;
- if (x == CONST1_RTX (GET_MODE (x)))
+ if (x == CONST1_RTX (mode))
return 2;
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
/* For XFmode constants, try to find a special 80387 instruction when
optimizing for size or on those CPUs that benefit from them. */
- if (GET_MODE (x) == XFmode
+ if (mode == XFmode
&& (optimize_size || TARGET_EXT_80387_CONSTANTS))
{
int i;
gcc_assert (STACK_TOP_P (operands[1]));
gcc_assert (MEM_P (operands[0]));
+ gcc_assert (GET_MODE (operands[1]) != TFmode);
if (fisttp)
output_asm_insn ("fisttp%z0\t%0", operands);
{
static char retval[] = ".word\t0xc_df";
int regno = REGNO (operands[opno]);
-
+
gcc_assert (FP_REGNO_P (regno));
retval[9] = '0' + (regno - FIRST_STACK_REG);
movlpd mem, reg (gas syntax)
else
movsd mem, reg
-
+
Code generation for unaligned packed loads of single precision data
(x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
if (x86_sse_unaligned_move_optimal)
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
-
+
static bool
ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
rtx operands[])
{
REAL_VALUE_TYPE TWO32r;
rtx fp_lo, fp_hi, x;
-
+
fp_lo = gen_reg_rtx (DFmode);
fp_hi = gen_reg_rtx (DFmode);
enum machine_mode
ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
{
- if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
- return ix86_fp_compare_mode (code);
+ enum machine_mode mode = GET_MODE (op0);
+
+ if (SCALAR_FLOAT_MODE_P (mode))
+ {
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+ return ix86_fp_compare_mode (code);
+ }
+
switch (code)
{
/* Only zero flag is needed. */
ix86_compare_emitted = NULL_RTX;
}
else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
- ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
- second_test, bypass_test);
+ {
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
+ ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ second_test, bypass_test);
+ }
else
ret = ix86_expand_int_compare (code, op0, op1);
enum machine_mode mode =
GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
- /* Do not handle DImode compares that go through special path. Also we can't
- deal with FP compares yet. This is possible to add. */
+ /* Do not handle DImode compares that go through special path.
+ Also we can't deal with FP compares yet. This is possible to add. */
if (mode == (TARGET_64BIT ? TImode : DImode))
return false;
- if (FLOAT_MODE_P (mode))
+
+ if (SCALAR_FLOAT_MODE_P (mode))
{
rtx second_test = NULL, bypass_test = NULL;
rtx compare_op, compare_seq;
- /* Shortcut: following common codes never translate into carry flag compares. */
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+
+ /* Shortcut: following common codes never translate
+ into carry flag compares. */
if (code == EQ || code == NE || code == UNEQ || code == LTGT
|| code == ORDERED || code == UNORDERED)
return false;
if (diff < 0)
{
+ enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
+
HOST_WIDE_INT tmp;
tmp = ct, ct = cf, cf = tmp;
diff = -diff;
- if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
+
+ if (SCALAR_FLOAT_MODE_P (cmp_mode))
{
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
/* We may be reversing unordered compare to normal compare, that
is not valid in general (we may convert non-trapping condition
to trapping one), however on i386 we currently emit all
{
if (cf == 0)
{
+ enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
+
cf = ct;
ct = 0;
- if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
- /* We may be reversing unordered compare to normal compare,
- that is not valid in general (we may convert non-trapping
- condition to trapping one), however on i386 we currently
- emit all comparisons unordered. */
- code = reverse_condition_maybe_unordered (code);
+
+ if (SCALAR_FLOAT_MODE_P (cmp_mode))
+ {
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
+ /* We may be reversing unordered compare to normal compare,
+ that is not valid in general (we may convert non-trapping
+ condition to trapping one), however on i386 we currently
+ emit all comparisons unordered. */
+ code = reverse_condition_maybe_unordered (code);
+ }
else
{
code = reverse_condition (code);
case V4SImode:
if (high_p)
unpack = gen_vec_interleave_highv4si;
- else
+ else
unpack = gen_vec_interleave_lowv4si;
break;
default:
- gcc_unreachable ();
+ gcc_unreachable ();
}
dest = gen_lowpart (imode, operands[0]);
return sc;
}
-/* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
- for constant loop counts. */
+/* Return mode for the memcpy/memset loop counter. Prefer SImode over
+ DImode for constant loop counts. */
static enum machine_mode
counter_mode (rtx count_exp)
The size is rounded down to whole number of chunk size moved at once.
SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
-
+
static void
expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
srcmem = change_address (srcmem, mode, y_addr);
/* When unrolling for chips that reorder memory reads and writes,
- we can save registers by using single temporary.
+ we can save registers by using single temporary.
Also using 4 temporaries is overkill in 32bit mode. */
if (!TARGET_64BIT && 0)
{
emit_label (out_label);
}
-/* Output "rep; mov" instruction.
+/* Output "rep; mov" instruction.
Arguments have same meaning as for previous function */
static void
expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
destexp, srcexp));
}
-/* Output "rep; stos" instruction.
+/* Output "rep; stos" instruction.
Arguments have same meaning as for previous function */
static void
expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
/* When asked to inline the call anyway, try to pick meaningful choice.
We look for maximal size of block that is faster to copy by hand and
take blocks of at most of that size guessing that average size will
- be roughly half of the block.
+ be roughly half of the block.
If this turns out to be bad, we might simply specify the preferred
choice in ix86_costs. */
4) Epilogue: code copying tail of the block that is too small to be
handled by main body (or up to size guarded by prologue guard). */
-
+
int
ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
rtx expected_align_exp, rtx expected_size_exp)
while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
bytes. Compensate if needed. */
-
+
if (size_needed < epilogue_size_needed)
{
tmp =
mode = DImode;
count_exp = force_reg (mode, count_exp);
}
- /* Do the cheap promotion to allow better CSE across the
+ /* Do the cheap promotion to allow better CSE across the
main loop and epilogue (ie one load of the big constant in the
front of all code. */
if (CONST_INT_P (val_exp))
BUILTIN_DESC_SWAP_OPERANDS },
{ MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
BUILTIN_DESC_SWAP_OPERANDS },
- { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
+ { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
{ MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
{ MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
IX86_BUILTIN_PALIGNR);
/* AMDFAM10 SSE4A New built-ins */
- def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
- def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
+ def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
- def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
+ def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
enum machine_mode tmode, mode1;
tree arg0, arg1, arg2;
int elt;
- rtx op0, op1;
+ rtx op0, op1, target;
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
op0 = force_reg (tmode, op0);
op1 = force_reg (mode1, op1);
- ix86_expand_vector_set (true, op0, op1, elt);
+ /* OP0 is the source of these builtin functions and shouldn't be
+ modified. Create a copy, use it and return it as target. */
+ target = gen_reg_rtx (tmode);
+ emit_move_insn (target, op0);
+ ix86_expand_vector_set (true, target, op1, elt);
- return op0;
+ return target;
}
/* Expand an expression EXP that calls a built-in function,
{
if (TREE_CODE (type) != VECTOR_TYPE)
return NULL_TREE;
-
+
switch (code)
{
case FLOAT_EXPR:
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
- if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
+ if (X87_FLOAT_MODE_P (mode))
{
if (class == FP_TOP_SSE_REGS)
return FP_TOP_REG;
return false;
case MULT:
- if (FLOAT_MODE_P (mode))
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
+ /* ??? SSE scalar cost should be used here. */
+ *total = ix86_cost->fmul;
+ return false;
+ }
+ else if (X87_FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fmul;
+ return false;
+ }
+ else if (FLOAT_MODE_P (mode))
+ {
+ /* ??? SSE vector cost should be used here. */
*total = ix86_cost->fmul;
return false;
}
case UDIV:
case MOD:
case UMOD:
- if (FLOAT_MODE_P (mode))
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ /* ??? SSE cost should be used here. */
+ *total = ix86_cost->fdiv;
+ else if (X87_FLOAT_MODE_P (mode))
+ *total = ix86_cost->fdiv;
+ else if (FLOAT_MODE_P (mode))
+ /* ??? SSE vector cost should be used here. */
*total = ix86_cost->fdiv;
else
*total = ix86_cost->divide[MODE_INDEX (mode)];
return false;
case PLUS:
- if (FLOAT_MODE_P (mode))
- *total = ix86_cost->fadd;
- else if (GET_MODE_CLASS (mode) == MODE_INT
+ if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
{
if (GET_CODE (XEXP (x, 0)) == PLUS
/* FALLTHRU */
case MINUS:
- if (FLOAT_MODE_P (mode))
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
+ /* ??? SSE cost should be used here. */
+ *total = ix86_cost->fadd;
+ return false;
+ }
+ else if (X87_FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fadd;
+ return false;
+ }
+ else if (FLOAT_MODE_P (mode))
+ {
+ /* ??? SSE vector cost should be used here. */
*total = ix86_cost->fadd;
return false;
}
/* FALLTHRU */
case NEG:
- if (FLOAT_MODE_P (mode))
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
+ /* ??? SSE cost should be used here. */
+ *total = ix86_cost->fchs;
+ return false;
+ }
+ else if (X87_FLOAT_MODE_P (mode))
+ {
+ *total = ix86_cost->fchs;
+ return false;
+ }
+ else if (FLOAT_MODE_P (mode))
+ {
+ /* ??? SSE vector cost should be used here. */
*total = ix86_cost->fchs;
return false;
}
return false;
case FLOAT_EXTEND:
- if (!TARGET_SSE_MATH
- || mode == XFmode
- || (mode == DFmode && !TARGET_SSE2))
+ if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
*total = 0;
return false;
case ABS:
- if (FLOAT_MODE_P (mode))
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ /* ??? SSE cost should be used here. */
+ *total = ix86_cost->fabs;
+ else if (X87_FLOAT_MODE_P (mode))
+ *total = ix86_cost->fabs;
+ else if (FLOAT_MODE_P (mode))
+ /* ??? SSE vector cost should be used here. */
*total = ix86_cost->fabs;
return false;
case SQRT:
- if (FLOAT_MODE_P (mode))
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ /* ??? SSE cost should be used here. */
+ *total = ix86_cost->fsqrt;
+ else if (X87_FLOAT_MODE_P (mode))
+ *total = ix86_cost->fsqrt;
+ else if (FLOAT_MODE_P (mode))
+ /* ??? SSE vector cost should be used here. */
*total = ix86_cost->fsqrt;
return false;
return clobbers;
}
-/* Implementes target vector targetm.asm.encode_section_info. This
+/* Implements target vector targetm.asm.encode_section_info. This
is not used by netware. */
static void ATTRIBUTE_UNUSED