#include "tm-constrs.h"
#include "params.h"
+static int x86_builtin_vectorization_cost (bool);
+
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT (-1)
#endif
{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
- {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
+ {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 1, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 1, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
/* Processor costs (relative to an add) */
DUMMY_STRINGOP_ALGS},
{{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
DUMMY_STRINGOP_ALGS},
{{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
- DUMMY_STRINGOP_ALGS}
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
{{libcall, {{-1, rep_prefix_4_byte}}},
- DUMMY_STRINGOP_ALGS}
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
DUMMY_STRINGOP_ALGS},
{{rep_prefix_4_byte, {{1024, unrolled_loop},
{8192, rep_prefix_4_byte}, {-1, libcall}}},
- DUMMY_STRINGOP_ALGS}
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
- DUMMY_STRINGOP_ALGS}
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
{{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
- DUMMY_STRINGOP_ALGS}
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
{{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
- DUMMY_STRINGOP_ALGS}
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
- {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
+ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 5, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 3, /* vec_unalign_load_cost. */
+ 3, /* vec_store_cost. */
+ 6, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
struct processor_costs amdfam10_cost = {
{libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{{libcall, {{8, loop}, {24, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
- {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
+ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 2, /* vec_store_cost. */
+ 6, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
{-1, libcall}}},
{libcall, {{24, loop}, {64, unrolled_loop},
- {8192, rep_prefix_8_byte}, {-1, libcall}}}}
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
static const
{{libcall, {{8, loop}, {15, unrolled_loop},
{2048, rep_prefix_4_byte}, {-1, libcall}}},
{libcall, {{24, loop}, {32, unrolled_loop},
- {8192, rep_prefix_8_byte}, {-1, libcall}}}}
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
/* Generic64 should produce code tuned for Nocona and K8. */
{DUMMY_STRINGOP_ALGS,
{libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
{DUMMY_STRINGOP_ALGS,
- {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
+ {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
DUMMY_STRINGOP_ALGS},
{{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
};
const struct processor_costs *ix86_cost = &pentium_cost;
#define m_ATHLON (1<<PROCESSOR_ATHLON)
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
-#define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
+#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
negatively, so enabling for Generic64 seems like good code size
tradeoff. We can't enable it for 32bit generic because it does not
work well with PPro base chips. */
- m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
+ m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
/* X86_TUNE_PUSH_MEMORY */
- m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
+ m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_ZERO_EXTEND_WITH_AND */
m_386,
/* X86_TUNE_UNROLL_STRLEN */
- m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
+ m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
/* X86_TUNE_DEEP_BRANCH_PREDICTION */
- m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
+ m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */
- m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
+ m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
- ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
+ ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
- m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
+ m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
| m_GENERIC /* | m_PENT4 ? */,
/* X86_TUNE_FAST_PREFIX */
m_PPRO,
/* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
- m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_ADD_ESP_8 */
- m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
+ m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SUB_ESP_4 */
- m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SUB_ESP_8 */
- m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
+ m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
- ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+ ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
| m_GENERIC | m_GEODE),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
- m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
m_ATHLON_K8,
/* X86_TUNE_SSE_TYPELESS_STORES */
- m_ATHLON_K8_AMDFAM10,
+ m_AMD_MULTIPLE,
/* X86_TUNE_SSE_LOAD0_BY_PXOR */
m_PPRO | m_PENT4 | m_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
- m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
~m_486,
/* X86_TUNE_USE_FFREEP */
- m_ATHLON_K8_AMDFAM10,
+ m_AMD_MULTIPLE,
/* X86_TUNE_INTER_UNIT_MOVES */
- ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
+ ~(m_AMD_MULTIPLE | m_GENERIC),
+
+ /* X86_TUNE_INTER_UNIT_CONVERSIONS */
+ ~(m_AMDFAM10),
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
- m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SCHEDULE */
- m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
+ m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_ATHLON_K8_AMDFAM10,
+ m_AMD_MULTIPLE,
/* X86_TUNE_USE_INCDEC */
~(m_PENT4 | m_NOCONA | m_GENERIC),
/* X86_TUNE_PAD_RETURNS */
- m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
/* X86_TUNE_EXT_80387_CONSTANTS */
m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
operand that cannot be represented using a modRM byte. The XOR
replacement is long decoded, so this split helps here as well. */
m_K6,
+
+ /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
+ from integer to FP. */
+ m_AMDFAM10,
};
/* Feature tests against the various architecture variations. */
};
static const unsigned int x86_accumulate_outgoing_args
- = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+ = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
- = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
+ = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC;
static enum stringop_alg stringop_alg = no_stringop;
#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
+#define OPTION_MASK_ISA_SSE5_UNSET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
+
/* Vectorization library interface and handlers. */
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
}
return true;
+ case OPT_msse5:
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
+ if (!value)
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
+ }
+ return true;
+
default:
return true;
}
PTA_SSE4A = 1 << 12,
PTA_NO_SAHF = 1 << 13,
PTA_SSE4_1 = 1 << 14,
- PTA_SSE4_2 = 1 << 15
+ PTA_SSE4_2 = 1 << 15,
+ PTA_SSE5 = 1 << 16
};
static struct pta
if (processor_alias_table[i].flags & PTA_SSE4A
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
+ if (processor_alias_table[i].flags & PTA_SSE5
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
if (processor_alias_table[i].flags & PTA_ABM)
x86_abm = true;
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
+ /* Turn on SSE4A bultins for -msse5. */
+ if (TARGET_SSE5)
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
+
/* Turn on SSE4.1 builtins for -msse4.2. */
if (TARGET_SSE4_2)
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
}
-/* Return true if TYPE has a variable argument list. */
-
-static bool
-type_has_variadic_args_p (tree type)
-{
- tree n, t = TYPE_ARG_TYPES (type);
-
- if (t == NULL)
- return false;
-
- while ((n = TREE_CHAIN (t)) != NULL)
- t = n;
-
- return TREE_VALUE (t) != void_type_node;
-}
-
/* Value is the number of bytes of arguments automatically
popped when returning from a subroutine call.
FUNDECL is the declaration node of the function (as a tree),
|| lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
rtd = 1;
- if (rtd && ! type_has_variadic_args_p (funtype))
+ if (rtd && ! stdarg_p (funtype))
return size;
}
cum->warn_sse = true;
cum->warn_mmx = true;
cum->maybe_vaarg = (fntype
- ? (!TYPE_ARG_TYPES (fntype)
- || type_has_variadic_args_p (fntype))
+ ? (!prototype_p (fntype) || stdarg_p (fntype))
: !libname);
if (!TARGET_64BIT)
{
CUMULATIVE_ARGS next_cum;
tree fntype;
- int stdarg_p;
/* This argument doesn't appear to be used anymore. Which is good,
because the old code here didn't suppress rtl generation. */
return;
fntype = TREE_TYPE (current_function_decl);
- stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
- && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
- != void_type_node));
/* For varargs, we do not want to skip the dummy va_dcl argument.
For stdargs, we do want to skip the last named argument. */
next_cum = *cum;
- if (stdarg_p)
+ if (stdarg_p (fntype))
function_arg_advance (&next_cum, mode, type, 1);
if (TARGET_64BIT_MS_ABI)
name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
name = targetm.strip_name_encoding (name);
- if (name[0] == FASTCALL_PREFIX)
- {
- name++;
- prefix = "*__imp_";
- }
- else
- prefix = "*__imp__";
-
+ prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
namelen = strlen (name);
prefixlen = strlen (prefix);
imp_name = (char *) alloca (namelen + prefixlen + 1);
X -- don't print any sort of PIC '@' suffix for a symbol.
& -- print some in-use local-dynamic symbol name.
H -- print a memory address offset by 8; used for sse high-parts
+ Y -- print condition for SSE5 com* instruction.
+ -- print a branch hint as 'cs' or 'ds' prefix
; -- print a semicolon (after prefixes due to bug in older gas).
*/
return;
}
+ case 'Y':
+ switch (GET_CODE (x))
+ {
+ case NE:
+ fputs ("neq", file);
+ break;
+ case EQ:
+ fputs ("eq", file);
+ break;
+ case GE:
+ case GEU:
+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
+ break;
+ case GT:
+ case GTU:
+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
+ break;
+ case LE:
+ case LEU:
+ fputs ("le", file);
+ break;
+ case LT:
+ case LTU:
+ fputs ("lt", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ case UNEQ:
+ fputs ("ueq", file);
+ break;
+ case UNGE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ fputs ("nle", file);
+ break;
+ case UNLE:
+ fputs ("ule", file);
+ break;
+ case UNLT:
+ fputs ("ult", file);
+ break;
+ case LTGT:
+ fputs ("une", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ return;
+
case ';':
#if TARGET_MACHO
fputs (" ; ", file);
enum machine_mode mode = GET_MODE (dest);
rtx t2, t3, x;
- if (op_false == CONST0_RTX (mode))
+ if (TARGET_SSE5)
+ {
+ rtx pcmov = gen_rtx_SET (mode, dest,
+ gen_rtx_IF_THEN_ELSE (mode, cmp,
+ op_true,
+ op_false));
+ emit_insn (pcmov);
+ }
+ else if (op_false == CONST0_RTX (mode))
{
op_true = force_reg (mode, op_true);
x = gen_rtx_AND (mode, cmp, op_true);
emit_insn (unpack (dest, src));
}
+/* This function performs the same task as ix86_expand_sse_unpack,
+ but with amdfam15 instructions. */
+
+#define PPERM_SRC 0x00 /* copy source */
+#define PPERM_INVERT 0x20 /* invert source */
+#define PPERM_REVERSE 0x40 /* bit reverse source */
+#define PPERM_REV_INV 0x60 /* bit reverse & invert src */
+#define PPERM_ZERO 0x80 /* all 0's */
+#define PPERM_ONES 0xa0 /* all 1's */
+#define PPERM_SIGN 0xc0 /* propigate sign bit */
+#define PPERM_INV_SIGN 0xe0 /* invert & propigate sign */
+
+#define PPERM_SRC1 0x00 /* use first source byte */
+#define PPERM_SRC2 0x10 /* use second source byte */
+
+void
+ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ int pperm_bytes[16];
+ int i;
+ int h = (high_p) ? 8 : 0;
+ int h2;
+ int sign_extend;
+ rtvec v = rtvec_alloc (16);
+ rtvec vs;
+ rtx x, p;
+ rtx op0 = operands[0], op1 = operands[1];
+
+ switch (imode)
+ {
+ case V16QImode:
+ vs = rtvec_alloc (8);
+ h2 = (high_p) ? 8 : 0;
+ for (i = 0; i < 8; i++)
+ {
+ pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
+ pperm_bytes[2*i+1] = ((unsigned_p)
+ ? PPERM_ZERO
+ : PPERM_SIGN | PPERM_SRC2 | i | h);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ for (i = 0; i < 8; i++)
+ RTVEC_ELT (vs, i) = GEN_INT (i + h2);
+
+ p = gen_rtx_PARALLEL (VOIDmode, vs);
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ if (unsigned_p)
+ emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
+ else
+ emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
+ break;
+
+ case V8HImode:
+ vs = rtvec_alloc (4);
+ h2 = (high_p) ? 4 : 0;
+ for (i = 0; i < 4; i++)
+ {
+ sign_extend = ((unsigned_p)
+ ? PPERM_ZERO
+ : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
+ pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
+ pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
+ pperm_bytes[4*i+2] = sign_extend;
+ pperm_bytes[4*i+3] = sign_extend;
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ for (i = 0; i < 4; i++)
+ RTVEC_ELT (vs, i) = GEN_INT (i + h2);
+
+ p = gen_rtx_PARALLEL (VOIDmode, vs);
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ if (unsigned_p)
+ emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
+ else
+ emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
+ break;
+
+ case V4SImode:
+ vs = rtvec_alloc (2);
+ h2 = (high_p) ? 2 : 0;
+ for (i = 0; i < 2; i++)
+ {
+ sign_extend = ((unsigned_p)
+ ? PPERM_ZERO
+ : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
+ pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
+ pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
+ pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
+ pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
+ pperm_bytes[8*i+4] = sign_extend;
+ pperm_bytes[8*i+5] = sign_extend;
+ pperm_bytes[8*i+6] = sign_extend;
+ pperm_bytes[8*i+7] = sign_extend;
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ for (i = 0; i < 4; i++)
+ RTVEC_ELT (vs, i) = GEN_INT (i + h2);
+
+ p = gen_rtx_PARALLEL (VOIDmode, vs);
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ if (unsigned_p)
+ emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
+ else
+ emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+}
+
+/* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
+ next narrower integer vector type */
+void
+ix86_expand_sse5_pack (rtx operands[3])
+{
+ enum machine_mode imode = GET_MODE (operands[0]);
+ int pperm_bytes[16];
+ int i;
+ rtvec v = rtvec_alloc (16);
+ rtx x;
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+
+ switch (imode)
+ {
+ case V16QImode:
+ for (i = 0; i < 8; i++)
+ {
+ pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
+ pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
+ break;
+
+ case V8HImode:
+ for (i = 0; i < 4; i++)
+ {
+ pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
+ pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
+ pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
+ pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
+ break;
+
+ case V4SImode:
+ for (i = 0; i < 2; i++)
+ {
+ pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
+ pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
+ pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
+ pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
+ pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
+ pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
+ pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
+ pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+}
+
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */
IX86_BUILTIN_FABSQ,
IX86_BUILTIN_COPYSIGNQ,
+ /* SSE5 instructions */
+ IX86_BUILTIN_FMADDSS,
+ IX86_BUILTIN_FMADDSD,
+ IX86_BUILTIN_FMADDPS,
+ IX86_BUILTIN_FMADDPD,
+ IX86_BUILTIN_FMSUBSS,
+ IX86_BUILTIN_FMSUBSD,
+ IX86_BUILTIN_FMSUBPS,
+ IX86_BUILTIN_FMSUBPD,
+ IX86_BUILTIN_FNMADDSS,
+ IX86_BUILTIN_FNMADDSD,
+ IX86_BUILTIN_FNMADDPS,
+ IX86_BUILTIN_FNMADDPD,
+ IX86_BUILTIN_FNMSUBSS,
+ IX86_BUILTIN_FNMSUBSD,
+ IX86_BUILTIN_FNMSUBPS,
+ IX86_BUILTIN_FNMSUBPD,
+ IX86_BUILTIN_PCMOV_V2DI,
+ IX86_BUILTIN_PCMOV_V4SI,
+ IX86_BUILTIN_PCMOV_V8HI,
+ IX86_BUILTIN_PCMOV_V16QI,
+ IX86_BUILTIN_PCMOV_V4SF,
+ IX86_BUILTIN_PCMOV_V2DF,
+ IX86_BUILTIN_PPERM,
+ IX86_BUILTIN_PERMPS,
+ IX86_BUILTIN_PERMPD,
+ IX86_BUILTIN_PMACSSWW,
+ IX86_BUILTIN_PMACSWW,
+ IX86_BUILTIN_PMACSSWD,
+ IX86_BUILTIN_PMACSWD,
+ IX86_BUILTIN_PMACSSDD,
+ IX86_BUILTIN_PMACSDD,
+ IX86_BUILTIN_PMACSSDQL,
+ IX86_BUILTIN_PMACSSDQH,
+ IX86_BUILTIN_PMACSDQL,
+ IX86_BUILTIN_PMACSDQH,
+ IX86_BUILTIN_PMADCSSWD,
+ IX86_BUILTIN_PMADCSWD,
+ IX86_BUILTIN_PHADDBW,
+ IX86_BUILTIN_PHADDBD,
+ IX86_BUILTIN_PHADDBQ,
+ IX86_BUILTIN_PHADDWD,
+ IX86_BUILTIN_PHADDWQ,
+ IX86_BUILTIN_PHADDDQ,
+ IX86_BUILTIN_PHADDUBW,
+ IX86_BUILTIN_PHADDUBD,
+ IX86_BUILTIN_PHADDUBQ,
+ IX86_BUILTIN_PHADDUWD,
+ IX86_BUILTIN_PHADDUWQ,
+ IX86_BUILTIN_PHADDUDQ,
+ IX86_BUILTIN_PHSUBBW,
+ IX86_BUILTIN_PHSUBWD,
+ IX86_BUILTIN_PHSUBDQ,
+ IX86_BUILTIN_PROTB,
+ IX86_BUILTIN_PROTW,
+ IX86_BUILTIN_PROTD,
+ IX86_BUILTIN_PROTQ,
+ IX86_BUILTIN_PROTB_IMM,
+ IX86_BUILTIN_PROTW_IMM,
+ IX86_BUILTIN_PROTD_IMM,
+ IX86_BUILTIN_PROTQ_IMM,
+ IX86_BUILTIN_PSHLB,
+ IX86_BUILTIN_PSHLW,
+ IX86_BUILTIN_PSHLD,
+ IX86_BUILTIN_PSHLQ,
+ IX86_BUILTIN_PSHAB,
+ IX86_BUILTIN_PSHAW,
+ IX86_BUILTIN_PSHAD,
+ IX86_BUILTIN_PSHAQ,
+ IX86_BUILTIN_FRCZSS,
+ IX86_BUILTIN_FRCZSD,
+ IX86_BUILTIN_FRCZPS,
+ IX86_BUILTIN_FRCZPD,
+ IX86_BUILTIN_CVTPH2PS,
+ IX86_BUILTIN_CVTPS2PH,
+
+ IX86_BUILTIN_COMEQSS,
+ IX86_BUILTIN_COMNESS,
+ IX86_BUILTIN_COMLTSS,
+ IX86_BUILTIN_COMLESS,
+ IX86_BUILTIN_COMGTSS,
+ IX86_BUILTIN_COMGESS,
+ IX86_BUILTIN_COMUEQSS,
+ IX86_BUILTIN_COMUNESS,
+ IX86_BUILTIN_COMULTSS,
+ IX86_BUILTIN_COMULESS,
+ IX86_BUILTIN_COMUGTSS,
+ IX86_BUILTIN_COMUGESS,
+ IX86_BUILTIN_COMORDSS,
+ IX86_BUILTIN_COMUNORDSS,
+ IX86_BUILTIN_COMFALSESS,
+ IX86_BUILTIN_COMTRUESS,
+
+ IX86_BUILTIN_COMEQSD,
+ IX86_BUILTIN_COMNESD,
+ IX86_BUILTIN_COMLTSD,
+ IX86_BUILTIN_COMLESD,
+ IX86_BUILTIN_COMGTSD,
+ IX86_BUILTIN_COMGESD,
+ IX86_BUILTIN_COMUEQSD,
+ IX86_BUILTIN_COMUNESD,
+ IX86_BUILTIN_COMULTSD,
+ IX86_BUILTIN_COMULESD,
+ IX86_BUILTIN_COMUGTSD,
+ IX86_BUILTIN_COMUGESD,
+ IX86_BUILTIN_COMORDSD,
+ IX86_BUILTIN_COMUNORDSD,
+ IX86_BUILTIN_COMFALSESD,
+ IX86_BUILTIN_COMTRUESD,
+
+ IX86_BUILTIN_COMEQPS,
+ IX86_BUILTIN_COMNEPS,
+ IX86_BUILTIN_COMLTPS,
+ IX86_BUILTIN_COMLEPS,
+ IX86_BUILTIN_COMGTPS,
+ IX86_BUILTIN_COMGEPS,
+ IX86_BUILTIN_COMUEQPS,
+ IX86_BUILTIN_COMUNEPS,
+ IX86_BUILTIN_COMULTPS,
+ IX86_BUILTIN_COMULEPS,
+ IX86_BUILTIN_COMUGTPS,
+ IX86_BUILTIN_COMUGEPS,
+ IX86_BUILTIN_COMORDPS,
+ IX86_BUILTIN_COMUNORDPS,
+ IX86_BUILTIN_COMFALSEPS,
+ IX86_BUILTIN_COMTRUEPS,
+
+ IX86_BUILTIN_COMEQPD,
+ IX86_BUILTIN_COMNEPD,
+ IX86_BUILTIN_COMLTPD,
+ IX86_BUILTIN_COMLEPD,
+ IX86_BUILTIN_COMGTPD,
+ IX86_BUILTIN_COMGEPD,
+ IX86_BUILTIN_COMUEQPD,
+ IX86_BUILTIN_COMUNEPD,
+ IX86_BUILTIN_COMULTPD,
+ IX86_BUILTIN_COMULEPD,
+ IX86_BUILTIN_COMUGTPD,
+ IX86_BUILTIN_COMUGEPD,
+ IX86_BUILTIN_COMORDPD,
+ IX86_BUILTIN_COMUNORDPD,
+ IX86_BUILTIN_COMFALSEPD,
+ IX86_BUILTIN_COMTRUEPD,
+
+ IX86_BUILTIN_PCOMEQUB,
+ IX86_BUILTIN_PCOMNEUB,
+ IX86_BUILTIN_PCOMLTUB,
+ IX86_BUILTIN_PCOMLEUB,
+ IX86_BUILTIN_PCOMGTUB,
+ IX86_BUILTIN_PCOMGEUB,
+ IX86_BUILTIN_PCOMFALSEUB,
+ IX86_BUILTIN_PCOMTRUEUB,
+ IX86_BUILTIN_PCOMEQUW,
+ IX86_BUILTIN_PCOMNEUW,
+ IX86_BUILTIN_PCOMLTUW,
+ IX86_BUILTIN_PCOMLEUW,
+ IX86_BUILTIN_PCOMGTUW,
+ IX86_BUILTIN_PCOMGEUW,
+ IX86_BUILTIN_PCOMFALSEUW,
+ IX86_BUILTIN_PCOMTRUEUW,
+ IX86_BUILTIN_PCOMEQUD,
+ IX86_BUILTIN_PCOMNEUD,
+ IX86_BUILTIN_PCOMLTUD,
+ IX86_BUILTIN_PCOMLEUD,
+ IX86_BUILTIN_PCOMGTUD,
+ IX86_BUILTIN_PCOMGEUD,
+ IX86_BUILTIN_PCOMFALSEUD,
+ IX86_BUILTIN_PCOMTRUEUD,
+ IX86_BUILTIN_PCOMEQUQ,
+ IX86_BUILTIN_PCOMNEUQ,
+ IX86_BUILTIN_PCOMLTUQ,
+ IX86_BUILTIN_PCOMLEUQ,
+ IX86_BUILTIN_PCOMGTUQ,
+ IX86_BUILTIN_PCOMGEUQ,
+ IX86_BUILTIN_PCOMFALSEUQ,
+ IX86_BUILTIN_PCOMTRUEUQ,
+
+ IX86_BUILTIN_PCOMEQB,
+ IX86_BUILTIN_PCOMNEB,
+ IX86_BUILTIN_PCOMLTB,
+ IX86_BUILTIN_PCOMLEB,
+ IX86_BUILTIN_PCOMGTB,
+ IX86_BUILTIN_PCOMGEB,
+ IX86_BUILTIN_PCOMFALSEB,
+ IX86_BUILTIN_PCOMTRUEB,
+ IX86_BUILTIN_PCOMEQW,
+ IX86_BUILTIN_PCOMNEW,
+ IX86_BUILTIN_PCOMLTW,
+ IX86_BUILTIN_PCOMLEW,
+ IX86_BUILTIN_PCOMGTW,
+ IX86_BUILTIN_PCOMGEW,
+ IX86_BUILTIN_PCOMFALSEW,
+ IX86_BUILTIN_PCOMTRUEW,
+ IX86_BUILTIN_PCOMEQD,
+ IX86_BUILTIN_PCOMNED,
+ IX86_BUILTIN_PCOMLTD,
+ IX86_BUILTIN_PCOMLED,
+ IX86_BUILTIN_PCOMGTD,
+ IX86_BUILTIN_PCOMGED,
+ IX86_BUILTIN_PCOMFALSED,
+ IX86_BUILTIN_PCOMTRUED,
+ IX86_BUILTIN_PCOMEQQ,
+ IX86_BUILTIN_PCOMNEQ,
+ IX86_BUILTIN_PCOMLTQ,
+ IX86_BUILTIN_PCOMLEQ,
+ IX86_BUILTIN_PCOMGTQ,
+ IX86_BUILTIN_PCOMGEQ,
+ IX86_BUILTIN_PCOMFALSEQ,
+ IX86_BUILTIN_PCOMTRUEQ,
+
IX86_BUILTIN_MAX
};
static const struct builtin_description bdesc_ptest[] =
{
/* SSE4.1 */
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
};
static const struct builtin_description bdesc_pcmpestr[] =
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
};
static const struct builtin_description bdesc_2arg[] =
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
};
+/* SSE5 */
+enum multi_arg_type {
+ MULTI_ARG_UNKNOWN,
+ MULTI_ARG_3_SF,
+ MULTI_ARG_3_DF,
+ MULTI_ARG_3_DI,
+ MULTI_ARG_3_SI,
+ MULTI_ARG_3_SI_DI,
+ MULTI_ARG_3_HI,
+ MULTI_ARG_3_HI_SI,
+ MULTI_ARG_3_QI,
+ MULTI_ARG_3_PERMPS,
+ MULTI_ARG_3_PERMPD,
+ MULTI_ARG_2_SF,
+ MULTI_ARG_2_DF,
+ MULTI_ARG_2_DI,
+ MULTI_ARG_2_SI,
+ MULTI_ARG_2_HI,
+ MULTI_ARG_2_QI,
+ MULTI_ARG_2_DI_IMM,
+ MULTI_ARG_2_SI_IMM,
+ MULTI_ARG_2_HI_IMM,
+ MULTI_ARG_2_QI_IMM,
+ MULTI_ARG_2_SF_CMP,
+ MULTI_ARG_2_DF_CMP,
+ MULTI_ARG_2_DI_CMP,
+ MULTI_ARG_2_SI_CMP,
+ MULTI_ARG_2_HI_CMP,
+ MULTI_ARG_2_QI_CMP,
+ MULTI_ARG_2_DI_TF,
+ MULTI_ARG_2_SI_TF,
+ MULTI_ARG_2_HI_TF,
+ MULTI_ARG_2_QI_TF,
+ MULTI_ARG_2_SF_TF,
+ MULTI_ARG_2_DF_TF,
+ MULTI_ARG_1_SF,
+ MULTI_ARG_1_DF,
+ MULTI_ARG_1_DI,
+ MULTI_ARG_1_SI,
+ MULTI_ARG_1_HI,
+ MULTI_ARG_1_QI,
+ MULTI_ARG_1_SI_DI,
+ MULTI_ARG_1_HI_DI,
+ MULTI_ARG_1_HI_SI,
+ MULTI_ARG_1_QI_DI,
+ MULTI_ARG_1_QI_SI,
+ MULTI_ARG_1_QI_HI,
+ MULTI_ARG_1_PH2PS,
+ MULTI_ARG_1_PS2PH
+};
+
+static const struct builtin_description bdesc_multi_arg[] =
+{
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+};
+
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
builtins. */
V16QI_type_node,
integer_type_node,
NULL_TREE);
+
+ /* SSE5 instructions */
+ tree v2di_ftype_v2di_v2di_v2di
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node,
+ V2DI_type_node,
+ V2DI_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_v4si_v4si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_v4si_v2di
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ V2DI_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_v8hi_v8hi
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_v8hi_v4si
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+
+ tree v2df_ftype_v2df_v2df_v16qi
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node,
+ V2DF_type_node,
+ V16QI_type_node,
+ NULL_TREE);
+
+ tree v4sf_ftype_v4sf_v4sf_v16qi
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node,
+ V4SF_type_node,
+ V16QI_type_node,
+ NULL_TREE);
+
+ tree v2di_ftype_v2di_si
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_si
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v16qi_ftype_v16qi_si
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4hi
+ = build_function_type_list (V4SF_type_node,
+ V4HI_type_node,
+ NULL_TREE);
+
+ tree v4hi_ftype_v4sf
+ = build_function_type_list (V4HI_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+
+ tree v2di_ftype_v2di
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
+
tree ftype;
/* The __float80 type. */
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
+
+ /* SSE4.1 and SSE5 */
+ def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
+ def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
+ def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
+ def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
/* SSE4.2. */
ftype = build_function_type_list (unsigned_type_node,
intQI_type_node,
integer_type_node, NULL_TREE);
def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
+
+ /* Add SSE5 multi-arg argument instructions */
+ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+ {
+ tree mtype = NULL_TREE;
+
+ if (d->name == 0)
+ continue;
+
+ switch ((enum multi_arg_type)d->flag)
+ {
+ case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
+ case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
+ case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
+ case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
+ case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
+ case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
+ case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
+ case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
+ case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
+ case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
+ case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
+ case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
+ case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
+ case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
+ case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
+ case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
+ case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
+ case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
+ case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
+ case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
+ case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
+ case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
+ case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
+ case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
+ case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
+ case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
+ case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
+ case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
+ case MULTI_ARG_UNKNOWN:
+ default:
+ gcc_unreachable ();
+ }
+
+ if (mtype)
+ def_builtin_const (d->mask, d->name, mtype, d->code);
+ }
}
static void
return target;
}
+/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
+
+static rtx
+ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
+ enum multi_arg_type m_type,
+ enum insn_code sub_code)
+{
+ rtx pat;
+ int i;
+ int nargs;
+ bool comparison_p = false;
+ bool tf_p = false;
+ bool last_arg_constant = false;
+ int num_memory = 0;
+ struct {
+ rtx op;
+ enum machine_mode mode;
+ } args[4];
+
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+
+ switch (m_type)
+ {
+ case MULTI_ARG_3_SF:
+ case MULTI_ARG_3_DF:
+ case MULTI_ARG_3_DI:
+ case MULTI_ARG_3_SI:
+ case MULTI_ARG_3_SI_DI:
+ case MULTI_ARG_3_HI:
+ case MULTI_ARG_3_HI_SI:
+ case MULTI_ARG_3_QI:
+ case MULTI_ARG_3_PERMPS:
+ case MULTI_ARG_3_PERMPD:
+ nargs = 3;
+ break;
+
+ case MULTI_ARG_2_SF:
+ case MULTI_ARG_2_DF:
+ case MULTI_ARG_2_DI:
+ case MULTI_ARG_2_SI:
+ case MULTI_ARG_2_HI:
+ case MULTI_ARG_2_QI:
+ nargs = 2;
+ break;
+
+ case MULTI_ARG_2_DI_IMM:
+ case MULTI_ARG_2_SI_IMM:
+ case MULTI_ARG_2_HI_IMM:
+ case MULTI_ARG_2_QI_IMM:
+ nargs = 2;
+ last_arg_constant = true;
+ break;
+
+ case MULTI_ARG_1_SF:
+ case MULTI_ARG_1_DF:
+ case MULTI_ARG_1_DI:
+ case MULTI_ARG_1_SI:
+ case MULTI_ARG_1_HI:
+ case MULTI_ARG_1_QI:
+ case MULTI_ARG_1_SI_DI:
+ case MULTI_ARG_1_HI_DI:
+ case MULTI_ARG_1_HI_SI:
+ case MULTI_ARG_1_QI_DI:
+ case MULTI_ARG_1_QI_SI:
+ case MULTI_ARG_1_QI_HI:
+ case MULTI_ARG_1_PH2PS:
+ case MULTI_ARG_1_PS2PH:
+ nargs = 1;
+ break;
+
+ case MULTI_ARG_2_SF_CMP:
+ case MULTI_ARG_2_DF_CMP:
+ case MULTI_ARG_2_DI_CMP:
+ case MULTI_ARG_2_SI_CMP:
+ case MULTI_ARG_2_HI_CMP:
+ case MULTI_ARG_2_QI_CMP:
+ nargs = 2;
+ comparison_p = true;
+ break;
+
+ case MULTI_ARG_2_SF_TF:
+ case MULTI_ARG_2_DF_TF:
+ case MULTI_ARG_2_DI_TF:
+ case MULTI_ARG_2_SI_TF:
+ case MULTI_ARG_2_HI_TF:
+ case MULTI_ARG_2_QI_TF:
+ nargs = 2;
+ tf_p = true;
+ break;
+
+ case MULTI_ARG_UNKNOWN:
+ default:
+ gcc_unreachable ();
+ }
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ gcc_assert (nargs <= 4);
+
+ for (i = 0; i < nargs; i++)
+ {
+ tree arg = CALL_EXPR_ARG (exp, i);
+ rtx op = expand_normal (arg);
+ int adjust = (comparison_p) ? 1 : 0;
+ enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
+
+ if (last_arg_constant && i == nargs-1)
+ {
+ if (GET_CODE (op) != CONST_INT)
+ {
+ error ("last argument must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
+
+ /* If we aren't optimizing, only allow one memory operand to be
+ generated. */
+ if (memory_operand (op, mode))
+ num_memory++;
+
+ gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
+
+ if (optimize
+ || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
+ || num_memory > 1)
+ op = force_reg (mode, op);
+ }
+
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+
+ case 2:
+ if (tf_p)
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ GEN_INT ((int)sub_code));
+ else if (! comparison_p)
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ else
+ {
+ rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
+ args[0].op,
+ args[1].op);
+
+ pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
+ }
+ break;
+
+ case 3:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+ return target;
+}
+
/* Subroutine of ix86_expand_builtin to take care of stores. */
static rtx
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
op0 = copy_to_reg (op0);
- op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
+ op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
op1 = copy_to_reg (op1);
target = gen_reg_rtx (tmode);
if (d->code == fcode)
return ix86_expand_sse_pcmpistr (d, exp, target);
+ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_multi_arg_builtin (d->icode, exp, target,
+ (enum multi_arg_type)d->flag,
+ d->comparison);
+
gcc_unreachable ();
}
return gen_rtx_REG (DImode, parm_regs[aggr]);
}
- if (ix86_function_regparm (type, function) > 0
- && !type_has_variadic_args_p (type))
+ if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
{
int regno = 0;
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
emit_move_insn (operand0, res);
}
+\f
+/* Validate whether a SSE5 instruction is valid or not.
+ OPERANDS is the array of operands.
+ NUM is the number of operands.
+ USES_OC0 is true if the instruction uses OC0 and provides 4 varients.
+ NUM_MEMORY is the maximum number of memory operands to accept. */
+bool ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
+{
+ int mem_mask;
+ int mem_count;
+ int i;
+
+ /* Count the number of memory arguments */
+ mem_mask = 0;
+ mem_count = 0;
+ for (i = 0; i < num; i++)
+ {
+ enum machine_mode mode = GET_MODE (operands[i]);
+ if (register_operand (operands[i], mode))
+ ;
+
+ else if (memory_operand (operands[i], mode))
+ {
+ mem_mask |= (1 << i);
+ mem_count++;
+ }
+
+ else
+ {
+ rtx pattern = PATTERN (insn);
+
+ /* allow 0 for pcmov */
+ if (GET_CODE (pattern) != SET
+ || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
+ || i < 2
+ || operands[i] != CONST0_RTX (mode))
+ return false;
+ }
+ }
+
+ /* If there were no memory operations, allow the insn */
+ if (mem_mask == 0)
+ return true;
+
+ /* Do not allow the destination register to be a memory operand. */
+ else if (mem_mask & (1 << 0))
+ return false;
+
+ /* If there are too many memory operations, disallow the instruction. While
+ the hardware only allows 1 memory reference, before register allocation
+ for some insns, we allow two memory operations sometimes in order to allow
+ code like the following to be optimized:
+
+ float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
+
+ or similar cases that are vectorized into using the fmaddss
+ instruction. */
+ else if (mem_count > num_memory)
+ return false;
+
+ /* Don't allow more than one memory operation if not optimizing. */
+ else if (mem_count > 1 && !optimize)
+ return false;
+
+ else if (num == 4 && mem_count == 1)
+ {
+ /* formats (destination is the first argument), example fmaddss:
+ xmm1, xmm1, xmm2, xmm3/mem
+ xmm1, xmm1, xmm2/mem, xmm3
+ xmm1, xmm2, xmm3/mem, xmm1
+ xmm1, xmm2/mem, xmm3, xmm1 */
+ if (uses_oc0)
+ return ((mem_mask == (1 << 1))
+ || (mem_mask == (1 << 2))
+ || (mem_mask == (1 << 3)));
+
+ /* format, example pmacsdd:
+ xmm1, xmm2, xmm3/mem, xmm1 */
+ else
+ return (mem_mask == (1 << 2));
+ }
+
+ else if (num == 4 && num_memory == 2)
+ {
+ /* If there are two memory operations, we can load one of the memory ops
+ into the destination register. This is for optimizating the
+ multiply/add ops, which the combiner has optimized both the multiply
+ and the add insns to have a memory operation. We have to be careful
+ that the destination doesn't overlap with the inputs. */
+ rtx op0 = operands[0];
+
+ if (reg_mentioned_p (op0, operands[1])
+ || reg_mentioned_p (op0, operands[2])
+ || reg_mentioned_p (op0, operands[3]))
+ return false;
+
+ /* formats (destination is the first argument), example fmaddss:
+ xmm1, xmm1, xmm2, xmm3/mem
+ xmm1, xmm1, xmm2/mem, xmm3
+ xmm1, xmm2, xmm3/mem, xmm1
+ xmm1, xmm2/mem, xmm3, xmm1
+
+ For the oc0 case, we will load either operands[1] or operands[3] into
+ operands[0], so any combination of 2 memory operands is ok. */
+ if (uses_oc0)
+ return true;
+
+ /* format, example pmacsdd:
+ xmm1, xmm2, xmm3/mem, xmm1
+
+ For the integer multiply/add instructions be more restrictive and
+ require operands[2] and operands[3] to be the memory operands. */
+ else
+ return (mem_mask == ((1 << 2) | (1 << 3)));
+ }
+
+ else if (num == 3 && num_memory == 1)
+ {
+ /* formats, example protb:
+ xmm1, xmm2, xmm3/mem
+ xmm1, xmm2/mem, xmm3 */
+ if (uses_oc0)
+ return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
+
+ /* format, example comeq:
+ xmm1, xmm2, xmm3/mem */
+ else
+ return (mem_mask == (1 << 2));
+ }
+
+ else
+ gcc_unreachable ();
+
+ return false;
+}
+
+\f
+/* Fixup an SSE5 instruction that has 2 memory input references into a form the
+ hardware will allow by using the destination register to load one of the
+ memory operations. Presently this is used by the multiply/add routines to
+ allow 2 memory references. */
+
+void
+ix86_expand_sse5_multiple_memory (rtx operands[],
+ int num,
+ enum machine_mode mode)
+{
+ rtx op0 = operands[0];
+ if (num != 4
+ || memory_operand (op0, mode)
+ || reg_mentioned_p (op0, operands[1])
+ || reg_mentioned_p (op0, operands[2])
+ || reg_mentioned_p (op0, operands[3]))
+ gcc_unreachable ();
+
+ /* For 2 memory operands, pick either operands[1] or operands[3] to move into
+ the destination register. */
+ if (memory_operand (operands[1], mode))
+ {
+ emit_move_insn (op0, operands[1]);
+ operands[1] = op0;
+ }
+ else if (memory_operand (operands[3], mode))
+ {
+ emit_move_insn (op0, operands[3]);
+ operands[3] = op0;
+ }
+ else
+ gcc_unreachable ();
+
+ return;
+}
+
\f
/* Table of valid machine attributes. */
static const struct attribute_spec ix86_attribute_table[] =
{ NULL, 0, 0, false, false, false, NULL }
};
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+x86_builtin_vectorization_cost (bool runtime_test)
+{
+ /* If the branch of the runtime test is taken - i.e. - the vectorized
+ version is skipped - this incurs a misprediction cost (because the
+ vectorized version is expected to be the fall-through). So we subtract
+ the latency of a mispredicted branch from the costs that are incured
+ when the vectorized version is executed.
+
+ TODO: The values in individual target tables have to be tuned or new
+ fields may be needed. For eg. on K8, the default branch path is the
+ not-taken path. If the taken path is predicted correctly, the minimum
+ penalty of going down the taken-path is 1 cycle. If the taken-path is
+ not predicted correctly, then the minimum penalty is 10 cycles. */
+
+ if (runtime_test)
+ {
+ return (-(ix86_cost->cond_taken_branch_cost));
+ }
+ else
+ return 0;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
#include "gt-i386.h"