#include "insn-attr.h"
#include "flags.h"
#include "except.h"
+#include "hashtab.h"
+#include "hash-set.h"
+#include "vec.h"
+#include "machmode.h"
+#include "input.h"
#include "function.h"
#include "recog.h"
#include "expr.h"
#include "reload.h"
#include "cgraph.h"
#include "hash-table.h"
-#include "vec.h"
#include "basic-block.h"
#include "tree-ssa-alias.h"
#include "internal-fn.h"
/* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
on and OPTION_MASK_ABI_64 is off. We turn off
OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
- -m64. */
- if (TARGET_LP64_P (opts->x_ix86_isa_flags))
+ -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
+ if (TARGET_LP64_P (opts->x_ix86_isa_flags)
+ || TARGET_16BIT_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
#endif
}
}
}
+#ifndef NO_PROFILE_COUNTERS
+ if (flag_nop_mcount)
+ error ("-mnop-mcount is not compatible with this target");
+#endif
+ if (flag_nop_mcount && flag_pic)
+ error ("-mnop-mcount is not implemented for -fPIC");
+
/* Accept -msseregparm only if at least SSE support is enabled. */
if (TARGET_SSEREGPARM_P (opts->x_target_flags)
&& ! TARGET_SSE_P (opts->x_ix86_isa_flags))
&& !(profile_flag && !flag_fentry))
{
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
- struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
+ cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
if (i && i->local && i->can_change_signature)
{
int local_regparm, globals = 0, regno;
&& !(profile_flag && !flag_fentry))
{
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
- struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+ cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
if (i && i->local && i->can_change_signature)
return TARGET_SSE2 ? 2 : 1;
}
/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
static bool
-ix86_legitimate_combined_insn (rtx insn)
+ix86_legitimate_combined_insn (rtx_insn *insn)
{
/* Check operand constraints in case hard registers were propagated
into insn pattern. This check prevents combine pass from
/* Operand has no constraints, anything is OK. */
win = !n_alternatives;
- alternative_mask enabled = recog_data.enabled_alternatives;
+ alternative_mask preferred = get_preferred_alternatives (insn);
for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
{
- if (!TEST_BIT (enabled, j))
+ if (!TEST_BIT (preferred, j))
continue;
if (op_alt[i].anything_ok
|| (op_alt[i].matches != -1
reinit_regs ();
}
+/* Return 1 if pseudo register should be created and used to hold
+ GOT address for PIC code. */
+static bool
+ix86_use_pseudo_pic_reg (void)
+{
+ if ((TARGET_64BIT
+ && (ix86_cmodel == CM_SMALL_PIC
+ || TARGET_PECOFF))
+ || !flag_pic)
+ return false;
+ return true;
+}
+
+/* Create and initialize PIC register if required. */
+static void
+ix86_init_pic_reg (void)
+{
+ edge entry_edge;
+ rtx_insn *seq;
+
+ if (!ix86_use_pseudo_pic_reg ())
+ return;
+
+ start_sequence ();
+
+ if (TARGET_64BIT)
+ {
+ if (ix86_cmodel == CM_LARGE_PIC)
+ {
+ rtx_code_label *label;
+ rtx tmp_reg;
+
+ gcc_assert (Pmode == DImode);
+ label = gen_label_rtx ();
+ emit_label (label);
+ LABEL_PRESERVE_P (label) = 1;
+ tmp_reg = gen_rtx_REG (Pmode, R11_REG);
+ gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
+ emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
+ label));
+ emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
+ emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
+ pic_offset_table_rtx, tmp_reg));
+ }
+ else
+ emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
+ }
+ else
+ {
+ rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
+ }
+
+ seq = get_insns ();
+ end_sequence ();
+
+ entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ insert_insn_on_edge (seq, entry_edge);
+ commit_one_edge_insertion (entry_edge);
+}
+
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
if (fndecl)
{
- i = cgraph_local_info (fndecl);
+ i = cgraph_node::local_info (fndecl);
cum->call_abi = ix86_function_abi (fndecl);
}
else
if (ix86_varargs_fpr_size)
{
enum machine_mode smode;
- rtx label, test;
+ rtx_code_label *label;
+ rtx test;
/* Now emit code to save SSE registers. The AX parameter contains number
of SSE parameter registers used to call this function, though all we
scratch_regno = split_stack_prologue_scratch_regno ();
if (scratch_regno != INVALID_REGNUM)
{
- rtx reg, seq;
+ rtx reg;
+ rtx_insn *seq;
reg = gen_reg_rtx (Pmode);
cfun->machine->split_stack_varargs_pointer = reg;
the constant X. */
const char *
-standard_sse_constant_opcode (rtx insn, rtx x)
+standard_sse_constant_opcode (rtx_insn *insn, rtx x)
{
switch (standard_sse_constant_p (x))
{
static unsigned int
ix86_select_alt_pic_regnum (void)
{
+ if (ix86_use_pseudo_pic_reg ())
+ return INVALID_REGNUM;
+
if (crtl->is_leaf
&& !crtl->profile
&& !ix86_current_function_calls_tls_descriptor)
ix86_save_reg (unsigned int regno, bool maybe_eh_return)
{
if (pic_offset_table_rtx
+ && !ix86_use_pseudo_pic_reg ()
&& regno == REAL_PIC_OFFSET_TABLE_REGNUM
&& (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
|| crtl->profile
unsigned int regno = find_drap_reg ();
rtx drap_vreg;
rtx arg_ptr;
- rtx seq, insn;
+ rtx_insn *seq, *insn;
arg_ptr = gen_rtx_REG (Pmode, regno);
crtl->drap_reg = arg_ptr;
HARD_FRAME_POINTER_REGNUM);
FOR_EACH_BB_FN (bb, cfun)
{
- rtx insn;
+ rtx_insn *insn;
FOR_BB_INSNS (bb, insn)
if (NONDEBUG_INSN_P (insn)
&& requires_stack_frame_p (insn, prologue_used,
{
struct machine_function *m = cfun->machine;
rtx insn, t;
- bool pic_reg_used;
struct ix86_frame frame;
HOST_WIDE_INT allocate;
bool int_registers_saved;
if (!sse_registers_saved)
ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
- pic_reg_used = false;
- /* We don't use pic-register for pe-coff target. */
- if (pic_offset_table_rtx
- && !TARGET_PECOFF
- && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
- || crtl->profile))
- {
- unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
-
- if (alt_pic_reg_used != INVALID_REGNUM)
- SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
-
- pic_reg_used = true;
- }
-
- if (pic_reg_used)
- {
- if (TARGET_64BIT)
- {
- if (ix86_cmodel == CM_LARGE_PIC)
- {
- rtx label, tmp_reg;
-
- gcc_assert (Pmode == DImode);
- label = gen_label_rtx ();
- emit_label (label);
- LABEL_PRESERVE_P (label) = 1;
- tmp_reg = gen_rtx_REG (Pmode, R11_REG);
- gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
- insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
- label));
- insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
- insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
- pic_offset_table_rtx, tmp_reg));
- }
- else
- insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
- }
- else
- {
- insn = emit_insn (gen_set_got (pic_offset_table_rtx));
- RTX_FRAME_RELATED_P (insn) = 1;
- add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
- }
- }
-
- /* In the pic_reg_used case, make sure that the got load isn't deleted
- when mcount needs it. Blockage to avoid call movement across mcount
- call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
- note. */
- if (crtl->profile && !flag_fentry && pic_reg_used)
- emit_insn (gen_prologue_use (pic_offset_table_rtx));
-
if (crtl->drap_reg && !crtl->stack_realign_needed)
{
/* vDRAP is setup but after reload it turns out stack realign
static void
ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
{
- if (pic_offset_table_rtx)
+ if (pic_offset_table_rtx
+ && !ix86_use_pseudo_pic_reg ())
SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
#if TARGET_MACHO
/* Mach-O doesn't support labels at the end of objects, so if
it looks like we might want one, insert a NOP. */
{
- rtx insn = get_last_insn ();
- rtx deleted_debug_label = NULL_RTX;
+ rtx_insn *insn = get_last_insn ();
+ rtx_insn *deleted_debug_label = NULL;
while (insn
&& NOTE_P (insn)
&& NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
struct ix86_frame frame;
HOST_WIDE_INT allocate;
unsigned HOST_WIDE_INT args_size;
- rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
+ rtx_code_label *label;
+ rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
rtx scratch_reg = NULL_RTX;
- rtx varargs_label = NULL_RTX;
+ rtx_code_label *varargs_label = NULL;
rtx fn;
gcc_assert (flag_split_stack && reload_completed);
REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
if (split_stack_fn == NULL_RTX)
- split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+ {
+ split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+ SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
+ }
fn = split_stack_fn;
/* Get more stack space. We pass in the desired stack space and the
gcc_assert ((args_size & 0xffffffff) == args_size);
if (split_stack_fn_large == NULL_RTX)
- split_stack_fn_large =
- gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
-
+ {
+ split_stack_fn_large =
+ gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
+ SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
+ }
if (ix86_cmodel == CM_LARGE_PIC)
{
- rtx label, x;
+ rtx_code_label *label;
+ rtx x;
label = gen_label_rtx ();
emit_label (label);
|| REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
cost++;
+ /* When address base or index is "pic_offset_table_rtx" we don't increase
+ address cost. When a memopt with "pic_offset_table_rtx" is not invariant
+ itself it most likely means that base or index is not invariant.
+ Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
+ profitable for x86. */
if (parts.base
+ && (!pic_offset_table_rtx
+ || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
&& (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
&& parts.index
+ && (!pic_offset_table_rtx
+ || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
&& (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
&& parts.base != parts.index)
cost++;
return set;
}
+/* Set regs_ever_live for PIC base address register
+ to true if required. */
+static void
+set_pic_reg_ever_live ()
+{
+ if (reload_in_progress)
+ df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
+}
+
/* Return a legitimate reference for ORIG (an address) using the
register REG. If REG is 0, a new pseudo is generated.
/* This symbol may be referenced via a displacement from the PIC
base address (@GOTOFF). */
- if (reload_in_progress)
- df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ set_pic_reg_ever_live ();
if (GET_CODE (addr) == CONST)
addr = XEXP (addr, 0);
if (GET_CODE (addr) == PLUS)
/* This symbol may be referenced via a displacement from the PIC
base address (@GOTOFF). */
- if (reload_in_progress)
- df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ set_pic_reg_ever_live ();
if (GET_CODE (addr) == CONST)
addr = XEXP (addr, 0);
if (GET_CODE (addr) == PLUS)
/* This symbol must be referenced via a load from the
Global Offset Table (@GOT). */
- if (reload_in_progress)
- df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ set_pic_reg_ever_live ();
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
new_rtx = gen_rtx_CONST (Pmode, new_rtx);
if (TARGET_64BIT)
{
if (!TARGET_64BIT)
{
- if (reload_in_progress)
- df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ set_pic_reg_ever_live ();
new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
UNSPEC_GOTOFF);
new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
if (TARGET_64BIT)
{
rtx rax = gen_rtx_REG (Pmode, AX_REG);
- rtx insns;
+ rtx_insn *insns;
start_sequence ();
emit_call_insn
if (TARGET_64BIT)
{
rtx rax = gen_rtx_REG (Pmode, AX_REG);
- rtx insns, eqv;
+ rtx_insn *insns;
+ rtx eqv;
start_sequence ();
emit_call_insn
}
else if (flag_pic)
{
- if (reload_in_progress)
- df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ set_pic_reg_ever_live ();
pic = pic_offset_table_rtx;
type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
}
if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
return (pic_offset_table_rtx
&& rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+ else if (!REG_P (x))
+ return false;
+ else if (pic_offset_table_rtx)
+ {
+ if (REGNO (x) == REGNO (pic_offset_table_rtx))
+ return true;
+ if (HARD_REGISTER_P (x)
+ && !HARD_REGISTER_P (pic_offset_table_rtx)
+ && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
+ return true;
+ return false;
+ }
else
- return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
+ return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
}
/* Helper function for ix86_delegitimize_address.
leal (%ebx, %ecx, 4), %ecx
...
movl foo@GOTOFF(%ecx), %edx
- in which case we return (%ecx - %ebx) + foo. */
- if (pic_offset_table_rtx)
+ in which case we return (%ecx - %ebx) + foo
+ or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
+ and reload has completed. */
+ if (pic_offset_table_rtx
+ && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
pic_offset_table_rtx),
result);
+ else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
+ {
+ rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+ tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
+ result = gen_rtx_PLUS (Pmode, tmp, result);
+ }
else
return orig_x;
}
}
}
-/* Locate some local-dynamic symbol still in use by this function
- so that we can print its name in some tls_local_dynamic_base
- pattern. */
-
-static int
-get_some_local_dynamic_name_1 (rtx *px, void *)
-{
- rtx x = *px;
-
- if (GET_CODE (x) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
- {
- cfun->machine->some_ld_name = XSTR (x, 0);
- return 1;
- }
-
- return 0;
-}
-
-static const char *
-get_some_local_dynamic_name (void)
-{
- rtx insn;
-
- if (cfun->machine->some_ld_name)
- return cfun->machine->some_ld_name;
-
- for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
- if (NONDEBUG_INSN_P (insn)
- && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
- return cfun->machine->some_ld_name;
-
- return NULL;
-}
-
/* Meaning of CODE:
L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
C -- print opcode suffix for set/cmov insn.
/* Return needed mode for entity in optimize_mode_switching pass. */
static int
-ix86_avx_u128_mode_needed (rtx insn)
+ix86_avx_u128_mode_needed (rtx_insn *insn)
{
if (CALL_P (insn))
{
prior to the execution of insn. */
static int
-ix86_i387_mode_needed (int entity, rtx insn)
+ix86_i387_mode_needed (int entity, rtx_insn *insn)
{
enum attr_i387_cw mode;
prior to the execution of insn. */
static int
-ix86_mode_needed (int entity, rtx insn)
+ix86_mode_needed (int entity, rtx_insn *insn)
{
switch (entity)
{
/* Calculate mode of upper 128bit AVX registers after the insn. */
static int
-ix86_avx_u128_mode_after (int mode, rtx insn)
+ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
{
rtx pat = PATTERN (insn);
/* Return the mode that an insn results in. */
int
-ix86_mode_after (int entity, int mode, rtx insn)
+ix86_mode_after (int entity, int mode, rtx_insn *insn)
{
switch (entity)
{
operand may be [SDX]Fmode. */
const char *
-output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
+output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
{
int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
int dimode_p = GET_MODE (operands[0]) == DImode;
{
case V4SFmode:
case V8SFmode:
+ case V16SFmode:
case V2DFmode:
case V4DFmode:
+ case V8DFmode:
dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
if (GET_CODE (op2) == CONST_VECTOR)
{
ix86_split_idivmod (enum machine_mode mode, rtx operands[],
bool signed_p)
{
- rtx end_label, qimode_label;
+ rtx_code_label *end_label, *qimode_label;
rtx insn, div, mod;
rtx scratch, tmp0, tmp1, tmp2;
rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
emit_label (end_label);
}
-/* Whether it is OK to emit CFI directives when emitting asm code. */
-
-bool
-ix86_emit_cfi ()
-{
- return dwarf2out_do_cfi_asm ();
-}
-
#define LEA_MAX_STALL (3)
#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
go to next cycle if there is some dependecy. */
static unsigned int
-increase_distance (rtx prev, rtx next, unsigned int distance)
+increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
{
df_ref def, use;
static int
distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
- rtx insn, int distance,
- rtx start, bool *found)
+ rtx_insn *insn, int distance,
+ rtx_insn *start, bool *found)
{
basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
- rtx prev = start;
- rtx next = NULL;
+ rtx_insn *prev = start;
+ rtx_insn *next = NULL;
*found = false;
static int
distance_non_agu_define (unsigned int regno1, unsigned int regno2,
- rtx insn)
+ rtx_insn *insn)
{
basic_block bb = BLOCK_FOR_INSN (insn);
int distance = 0;
static int
distance_agu_use_in_bb (unsigned int regno,
- rtx insn, int distance, rtx start,
+ rtx_insn *insn, int distance, rtx_insn *start,
bool *found, bool *redefined)
{
basic_block bb = NULL;
- rtx next = start;
- rtx prev = NULL;
+ rtx_insn *next = start;
+ rtx_insn *prev = NULL;
*found = false;
*redefined = false;
a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
static int
-distance_agu_use (unsigned int regno0, rtx insn)
+distance_agu_use (unsigned int regno0, rtx_insn *insn)
{
basic_block bb = BLOCK_FOR_INSN (insn);
int distance = 0;
SPLIT_COST cycles higher latency than lea latency. */
static bool
-ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
+ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
unsigned int regno2, int split_cost, bool has_scale)
{
int dist_define, dist_use;
false otherwise. */
static bool
-ix86_ok_to_clobber_flags (rtx insn)
+ix86_ok_to_clobber_flags (rtx_insn *insn)
{
basic_block bb = BLOCK_FOR_INSN (insn);
df_ref use;
move and add to avoid AGU stalls. */
bool
-ix86_avoid_lea_for_add (rtx insn, rtx operands[])
+ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
{
unsigned int regno0, regno1, regno2;
instruction. */
bool
-ix86_use_lea_for_mov (rtx insn, rtx operands[])
+ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
{
unsigned int regno0, regno1;
instructions to avoid AGU stalls. */
bool
-ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
+ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
{
unsigned int regno0, regno1, regno2;
int split_cost;
/* Return true if regno1 def is nearest to the insn. */
static bool
-find_nearest_reg_def (rtx insn, int regno1, int regno2)
+find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
{
- rtx prev = insn;
- rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
+ rtx_insn *prev = insn;
+ rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
if (insn == start)
return false;
at lea position. */
void
-ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
+ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
{
unsigned int regno0, regno1, regno2;
struct ix86_address parts;
used soon, LEA is better and otherwise ADD is better. */
bool
-ix86_lea_for_add_ok (rtx insn, rtx operands[])
+ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
{
unsigned int regno0 = true_regnum (operands[0]);
unsigned int regno1 = true_regnum (operands[1]);
case TImode:
/* Expand DImode branch into multiple compare+branch. */
{
- rtx lo[2], hi[2], label2;
+ rtx lo[2], hi[2];
+ rtx_code_label *label2;
enum rtx_code code1, code2, code3;
enum machine_mode submode;
if (SCALAR_FLOAT_MODE_P (mode))
{
- rtx compare_op, compare_seq;
+ rtx compare_op;
+ rtx_insn *compare_seq;
gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
ix86_expand_int_movcc (rtx operands[])
{
enum rtx_code code = GET_CODE (operands[1]), compare_code;
- rtx compare_seq, compare_op;
+ rtx_insn *compare_seq;
+ rtx compare_op;
enum machine_mode mode = GET_MODE (operands[0]);
bool sign_bit_compare_p = false;
rtx op0 = XEXP (operands[1], 0);
}
break;
+ case V64QImode:
+ gen = gen_avx512bw_blendmv64qi;
+ break;
+ case V32HImode:
+ gen = gen_avx512bw_blendmv32hi;
+ break;
case V16SImode:
gen = gen_avx512f_blendmv16si;
break;
}
break;
+ case V64QImode:
+ case V32HImode:
case V32QImode:
case V16HImode:
case V16QImode:
return true;
}
+/* AVX512F does support 64-byte integer vector operations,
+ thus the longest vector we are faced with is V64QImode. */
+#define MAX_VECT_LEN 64
+
+struct expand_vec_perm_d
+{
+ rtx target, op0, op1;
+ unsigned char perm[MAX_VECT_LEN];
+ enum machine_mode vmode;
+ unsigned char nelt;
+ bool one_operand_p;
+ bool testing_p;
+};
+
static bool
-ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
+ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
+ struct expand_vec_perm_d *d)
{
- enum machine_mode mode = GET_MODE (op0);
+ /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+ expander, so args are either in d, or in op0, op1 etc. */
+ enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
+ enum machine_mode maskmode = mode;
+ rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
+
switch (mode)
{
+ case V8HImode:
+ if (TARGET_AVX512VL && TARGET_AVX512BW)
+ gen = gen_avx512vl_vpermi2varv8hi3;
+ break;
+ case V16HImode:
+ if (TARGET_AVX512VL && TARGET_AVX512BW)
+ gen = gen_avx512vl_vpermi2varv16hi3;
+ break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ gen = gen_avx512bw_vpermi2varv32hi3;
+ break;
+ case V4SImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv4si3;
+ break;
+ case V8SImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv8si3;
+ break;
case V16SImode:
- emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
- force_reg (V16SImode, mask),
- op1));
- return true;
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vpermi2varv16si3;
+ break;
+ case V4SFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv4sf3;
+ maskmode = V4SImode;
+ }
+ break;
+ case V8SFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv8sf3;
+ maskmode = V8SImode;
+ }
+ break;
case V16SFmode:
- emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
- force_reg (V16SImode, mask),
- op1));
- return true;
+ if (TARGET_AVX512F)
+ {
+ gen = gen_avx512f_vpermi2varv16sf3;
+ maskmode = V16SImode;
+ }
+ break;
+ case V2DImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv2di3;
+ break;
+ case V4DImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512vl_vpermi2varv4di3;
+ break;
case V8DImode:
- emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
- force_reg (V8DImode, mask), op1));
- return true;
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vpermi2varv8di3;
+ break;
+ case V2DFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv2df3;
+ maskmode = V2DImode;
+ }
+ break;
+ case V4DFmode:
+ if (TARGET_AVX512VL)
+ {
+ gen = gen_avx512vl_vpermi2varv4df3;
+ maskmode = V4DImode;
+ }
+ break;
case V8DFmode:
- emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
- force_reg (V8DImode, mask), op1));
- return true;
+ if (TARGET_AVX512F)
+ {
+ gen = gen_avx512f_vpermi2varv8df3;
+ maskmode = V8DImode;
+ }
+ break;
default:
- return false;
+ break;
}
+
+ if (gen == NULL)
+ return false;
+
+ /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
+ expander, so args are either in d, or in op0, op1 etc. */
+ if (d)
+ {
+ rtx vec[64];
+ target = d->target;
+ op0 = d->op0;
+ op1 = d->op1;
+ for (int i = 0; i < d->nelt; ++i)
+ vec[i] = GEN_INT (d->perm[i]);
+ mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
+ }
+
+ emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
+ return true;
}
/* Expand a variable vector permutation. */
e = GET_MODE_UNIT_SIZE (mode);
gcc_assert (w <= 64);
- if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
+ if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
return;
if (TARGET_AVX2)
switch (imode)
{
+ case V64QImode:
+ if (unsigned_p)
+ unpack = gen_avx512bw_zero_extendv32qiv32hi2;
+ else
+ unpack = gen_avx512bw_sign_extendv32qiv32hi2;
+ halfmode = V32QImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
+ break;
case V32QImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv16qiv16hi2;
/* Helper function for the string operations below. Dest VARIABLE whether
it is aligned to VALUE bytes. If true, jump to the label. */
-static rtx
+static rtx_code_label *
ix86_expand_aligntest (rtx variable, int value, bool epilogue)
{
- rtx label = gen_label_rtx ();
+ rtx_code_label *label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
if (GET_MODE (variable) == DImode)
emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
rtx count, enum machine_mode mode, int unroll,
int expected_size, bool issetmem)
{
- rtx out_label, top_label, iter, tmp;
+ rtx_code_label *out_label, *top_label;
+ rtx iter, tmp;
enum machine_mode iter_mode = counter_mode (count);
int piece_size_n = GET_MODE_SIZE (mode) * unroll;
rtx piece_size = GEN_INT (piece_size_n);
{
if (max_size > 4)
{
- rtx label = ix86_expand_aligntest (count, 4, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
src = change_address (srcmem, SImode, srcptr);
dest = change_address (destmem, SImode, destptr);
emit_insn (gen_strmov (destptr, dest, srcptr, src));
}
if (max_size > 2)
{
- rtx label = ix86_expand_aligntest (count, 2, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
src = change_address (srcmem, HImode, srcptr);
dest = change_address (destmem, HImode, destptr);
emit_insn (gen_strmov (destptr, dest, srcptr, src));
}
if (max_size > 1)
{
- rtx label = ix86_expand_aligntest (count, 1, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
src = change_address (srcmem, QImode, srcptr);
dest = change_address (destmem, QImode, destptr);
emit_insn (gen_strmov (destptr, dest, srcptr, src));
if (max_size > 4)
{
- rtx label = ix86_expand_aligntest (count, 4, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
src = change_address (srcmem, SImode, srcptr);
dest = change_address (destmem, SImode, destptr);
emit_move_insn (dest, src);
}
if (max_size > 2)
{
- rtx label = ix86_expand_aligntest (count, 2, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
src = change_address (srcmem, HImode, tmp);
tmp = gen_rtx_PLUS (Pmode, destptr, offset);
}
if (max_size > 1)
{
- rtx label = ix86_expand_aligntest (count, 1, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
src = change_address (srcmem, QImode, tmp);
tmp = gen_rtx_PLUS (Pmode, destptr, offset);
}
if (max_size > 16)
{
- rtx label = ix86_expand_aligntest (count, 16, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
if (TARGET_64BIT)
{
dest = change_address (destmem, DImode, destptr);
}
if (max_size > 8)
{
- rtx label = ix86_expand_aligntest (count, 8, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
if (TARGET_64BIT)
{
dest = change_address (destmem, DImode, destptr);
}
if (max_size > 4)
{
- rtx label = ix86_expand_aligntest (count, 4, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
dest = change_address (destmem, SImode, destptr);
emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
emit_label (label);
}
if (max_size > 2)
{
- rtx label = ix86_expand_aligntest (count, 2, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
dest = change_address (destmem, HImode, destptr);
emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
emit_label (label);
}
if (max_size > 1)
{
- rtx label = ix86_expand_aligntest (count, 1, true);
+ rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
dest = change_address (destmem, QImode, destptr);
emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
emit_label (label);
{
if (align <= i)
{
- rtx label = ix86_expand_aligntest (destptr, i, false);
+ rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
if (issetmem)
{
if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
rtx count, int size,
rtx done_label, bool issetmem)
{
- rtx label = ix86_expand_aligntest (count, size, false);
+ rtx_code_label *label = ix86_expand_aligntest (count, size, false);
enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
rtx modesize;
int n;
enum machine_mode mode,
rtx value, rtx vec_value,
rtx *count,
- rtx *done_label,
+ rtx_code_label **done_label,
int size,
int desired_align,
int align,
bool dynamic_check,
bool issetmem)
{
- rtx loop_label = NULL, label;
+ rtx_code_label *loop_label = NULL, *label;
int n;
rtx modesize;
int prolog_size = 0;
{
rtx destreg;
rtx srcreg = NULL;
- rtx label = NULL;
+ rtx_code_label *label = NULL;
rtx tmp;
- rtx jump_around_label = NULL;
+ rtx_code_label *jump_around_label = NULL;
HOST_WIDE_INT align = 1;
unsigned HOST_WIDE_INT count = 0;
HOST_WIDE_INT expected_size = -1;
}
else
{
- rtx hot_label = gen_label_rtx ();
+ rtx_code_label *hot_label = gen_label_rtx ();
if (jump_around_label == NULL_RTX)
jump_around_label = gen_label_rtx ();
emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
{
int align;
rtx tmp;
- rtx align_2_label = NULL_RTX;
- rtx align_3_label = NULL_RTX;
- rtx align_4_label = gen_label_rtx ();
- rtx end_0_label = gen_label_rtx ();
+ rtx_code_label *align_2_label = NULL;
+ rtx_code_label *align_3_label = NULL;
+ rtx_code_label *align_4_label = gen_label_rtx ();
+ rtx_code_label *end_0_label = gen_label_rtx ();
rtx mem;
rtx tmpreg = gen_reg_rtx (SImode);
rtx scratch = gen_reg_rtx (SImode);
}
else
{
- rtx end_2_label = gen_label_rtx ();
+ rtx_code_label *end_2_label = gen_label_rtx ();
/* Is zero in the first two bytes? */
emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
rtx callarg2,
rtx pop, bool sibcall)
{
- unsigned int const cregs_size
- = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
- rtx vec[3 + cregs_size];
+ rtx vec[3];
rtx use = NULL, call;
unsigned int vec_len = 0;
&& DEFAULT_ABI != MS_ABI))
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
&& ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
- use_reg (&use, pic_offset_table_rtx);
+ {
+ use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
+ if (ix86_use_pseudo_pic_reg ())
+ emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
+ pic_offset_table_rtx);
+ }
}
if (TARGET_64BIT && INTVAL (callarg2) >= 0)
if (TARGET_64BIT_MS_ABI
&& (!callarg2 || INTVAL (callarg2) != -2))
{
- unsigned i;
-
- vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
- UNSPEC_MS_TO_SYSV_CALL);
+ int const cregs_size
+ = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
+ int i;
for (i = 0; i < cregs_size; i++)
{
int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
- vec[vec_len++]
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
+ clobber_reg (&use, gen_rtx_REG (mode, regno));
}
}
/* Output the assembly for a call instruction. */
const char *
-ix86_output_call_insn (rtx insn, rtx call_op)
+ix86_output_call_insn (rtx_insn *insn, rtx call_op)
{
bool direct_p = constant_call_address_operand (call_op, VOIDmode);
bool seh_nop_p = false;
circumstances. Determine if we have one of those. */
if (TARGET_SEH)
{
- rtx i;
+ rtx_insn *i;
for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
{
/* Compute default value for "length_immediate" attribute. When SHORTFORM
is set, expect that insn have 8bit immediate alternative. */
int
-ix86_attr_length_immediate_default (rtx insn, bool shortform)
+ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
{
int len = 0;
int i;
/* Compute default value for "length_address" attribute. */
int
-ix86_attr_length_address_default (rtx insn)
+ix86_attr_length_address_default (rtx_insn *insn)
{
int i;
for (i = recog_data.n_operands - 1; i >= 0; --i)
if (MEM_P (recog_data.operand[i]))
{
- constrain_operands_cached (reload_completed);
+ constrain_operands_cached (insn, reload_completed);
if (which_alternative != -1)
{
const char *constraints = recog_data.constraints[i];
2 or 3 byte VEX prefix and 1 opcode byte. */
int
-ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
+ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
+ bool has_vex_w)
{
int i;
by DEP_INSN and nothing set by DEP_INSN. */
static bool
-ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
{
rtx set, set2;
SET_INSN. */
bool
-ix86_agi_dependent (rtx set_insn, rtx use_insn)
+ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
{
int i;
extract_insn_cached (use_insn);
/* Return true if there exists exact dependency for store & load, i.e.
the same memory address is used in them. */
static bool
-exact_store_load_dependency (rtx store, rtx load)
+exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
{
rtx set1, set2;
}
static int
-ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
{
enum attr_type insn_type, dep_insn_type;
enum attr_memory memory;
"Intel Architectures Optimization Reference Manual". */
static bool
-ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
+ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
{
rtx src, dest;
- rtx single_set = single_set (condgen);
enum rtx_code ccode;
rtx compare_set = NULL_RTX, test_if, cond;
rtx alu_set = NULL_RTX, addr = NULL_RTX;
&& get_attr_type (condgen) != TYPE_ALU)
return false;
- if (single_set == NULL_RTX
+ compare_set = single_set (condgen);
+ if (compare_set == NULL_RTX
&& !TARGET_FUSE_ALU_AND_BRANCH)
return false;
- if (single_set != NULL_RTX)
- compare_set = single_set;
- else
+ if (compare_set == NULL_RTX)
{
int i;
rtx pat = PATTERN (condgen);
ready list.
Return index of IMUL producer if it was found and -1 otherwise. */
static int
-do_reorder_for_imul (rtx *ready, int n_ready)
+do_reorder_for_imul (rtx_insn **ready, int n_ready)
{
- rtx insn, set, insn1, insn2;
+ rtx_insn *insn;
+ rtx set, insn1, insn2;
sd_iterator_def sd_it;
dep_t dep;
int index = -1;
scheduled earlier. Applied for Silvermont only.
Return true if top 2 insns must be interchanged. */
static bool
-swap_top_of_ready_list (rtx *ready, int n_ready)
+swap_top_of_ready_list (rtx_insn **ready, int n_ready)
{
- rtx top = ready[n_ready - 1];
- rtx next = ready[n_ready - 2];
+ rtx_insn *top = ready[n_ready - 1];
+ rtx_insn *next = ready[n_ready - 2];
rtx set;
sd_iterator_def sd_it;
dep_t dep;
/* Perform possible reodering of ready list for Atom/Silvermont only.
Return issue rate. */
static int
-ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
- int clock_var)
+ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
+ int *pn_ready, int clock_var)
{
int issue_rate = -1;
int n_ready = *pn_ready;
int i;
- rtx insn;
+ rtx_insn *insn;
int index = -1;
/* Set up issue rate. */
/* Add output dependencies for chain of function adjacent arguments if only
there is a move to likely spilled HW register. Return first argument
if at least one dependence was added or NULL otherwise. */
-static rtx
-add_parameter_dependencies (rtx call, rtx head)
+static rtx_insn *
+add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
{
- rtx insn;
- rtx last = call;
- rtx first_arg = NULL;
+ rtx_insn *insn;
+ rtx_insn *last = call;
+ rtx_insn *first_arg = NULL;
bool is_spilled = false;
head = PREV_INSN (head);
/* Add output or anti dependency from insn to first_arg to restrict its code
motion. */
static void
-avoid_func_arg_motion (rtx first_arg, rtx insn)
+avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
{
rtx set;
rtx tmp;
/* Avoid cross block motion of function argument through adding dependency
from the first non-jump instruction in bb. */
static void
-add_dependee_for_func_arg (rtx arg, basic_block bb)
+add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
{
- rtx insn = BB_END (bb);
+ rtx_insn *insn = BB_END (bb);
while (insn)
{
/* Hook for pre-reload schedule - avoid motion of function arguments
passed in likely spilled HW registers. */
static void
-ix86_dependencies_evaluation_hook (rtx head, rtx tail)
+ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
{
- rtx insn;
- rtx first_arg = NULL;
+ rtx_insn *insn;
+ rtx_insn *first_arg = NULL;
if (reload_completed)
return;
while (head != tail && DEBUG_INSN_P (head))
moves from function argument registers at the top of the function entry
and moves from function return value registers after call. */
static int
-ix86_adjust_priority (rtx insn, int priority)
+ix86_adjust_priority (rtx_insn *insn, int priority)
{
rtx set;
data->ifetch_block_n_insns = 0;
}
-static int min_insn_size (rtx);
+static int min_insn_size (rtx_insn *);
/* Filter out insns from ready_try that the core will not be able to issue
on current cycle due to decoder. */
{
while (n_ready--)
{
- rtx insn;
+ rtx_insn *insn;
int insn_size;
if (ready_try[n_ready])
static void
core2i7_first_cycle_multipass_issue (void *_data,
signed char *ready_try, int n_ready,
- rtx insn, const void *_prev_data)
+ rtx_insn *insn, const void *_prev_data)
{
ix86_first_cycle_multipass_data_t data
= (ix86_first_cycle_multipass_data_t) _data;
IX86_BUILTIN_ADDCARRYX32,
IX86_BUILTIN_ADDCARRYX64,
+ /* SBB instructions. */
+ IX86_BUILTIN_SBB32,
+ IX86_BUILTIN_SBB64,
+
/* FSGSBASE instructions. */
IX86_BUILTIN_RDFSBASE32,
IX86_BUILTIN_RDFSBASE64,
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
IX86_BUILTIN_ADDCARRYX64);
+ /* SBB */
+ def_builtin (0, "__builtin_ia32_sbb_u32",
+ UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
+ def_builtin (OPTION_MASK_ISA_64BIT,
+ "__builtin_ia32_sbb_u64",
+ UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
+ IX86_BUILTIN_SBB64);
+
/* Read/write FLAGS. */
def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
*empty_bb = init_lowered_empty_function (decl, false);
cgraph_node::add_new_function (decl, true);
- cgraph_node::get_create (decl)->call_function_insertion_hooks ();
+ symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
pop_cfun ();
}
dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
- rebuild_cgraph_edges ();
+ cgraph_edge::rebuild_edges ();
pop_cfun ();
return resolver_decl;
}
"__cpu_model");
- varpool_add_new_variable (__cpu_model_var);
+ varpool_node::add (__cpu_model_var);
gcc_assert ((args != NULL) && (*args != NULL));
if (!match)
switch (icode)
{
- case CODE_FOR_avx2_inserti128:
- case CODE_FOR_avx2_extracti128:
+ case CODE_FOR_avx_vinsertf128v4di:
+ case CODE_FOR_avx_vextractf128v4di:
error ("the last argument must be an 1-bit immediate");
return const0_rtx;
case CODE_FOR_avx512f_cmpv16si3_mask:
case CODE_FOR_avx512f_ucmpv8di3_mask:
case CODE_FOR_avx512f_ucmpv16si3_mask:
+ case CODE_FOR_avx512vl_cmpv4di3_mask:
+ case CODE_FOR_avx512vl_cmpv8si3_mask:
+ case CODE_FOR_avx512vl_ucmpv4di3_mask:
+ case CODE_FOR_avx512vl_ucmpv8si3_mask:
+ case CODE_FOR_avx512vl_cmpv2di3_mask:
+ case CODE_FOR_avx512vl_cmpv4si3_mask:
+ case CODE_FOR_avx512vl_ucmpv2di3_mask:
+ case CODE_FOR_avx512vl_ucmpv4si3_mask:
error ("the last argument must be a 3-bit immediate");
return const0_rtx;
case CODE_FOR_avx_vpermilv4df:
case CODE_FOR_avx512f_getmantv8df_mask:
case CODE_FOR_avx512f_getmantv16sf_mask:
+ case CODE_FOR_avx512vl_getmantv8sf_mask:
+ case CODE_FOR_avx512vl_getmantv4df_mask:
+ case CODE_FOR_avx512vl_getmantv4sf_mask:
+ case CODE_FOR_avx512vl_getmantv2df_mask:
+ case CODE_FOR_avx512dq_rangepv8df_mask_round:
+ case CODE_FOR_avx512dq_rangepv16sf_mask_round:
+ case CODE_FOR_avx512dq_rangepv4df_mask:
+ case CODE_FOR_avx512dq_rangepv8sf_mask:
+ case CODE_FOR_avx512dq_rangepv2df_mask:
+ case CODE_FOR_avx512dq_rangepv4sf_mask:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_avx512f_vinserti32x4_mask:
case CODE_FOR_avx512f_vextractf32x4_mask:
case CODE_FOR_avx512f_vextracti32x4_mask:
+ case CODE_FOR_sse2_shufpd:
+ case CODE_FOR_sse2_shufpd_mask:
+ case CODE_FOR_avx512dq_shuf_f64x2_mask:
+ case CODE_FOR_avx512dq_shuf_i64x2_mask:
+ case CODE_FOR_avx512vl_shuf_i32x4_mask:
+ case CODE_FOR_avx512vl_shuf_f32x4_mask:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
case CODE_FOR_avx512f_vinserti64x4_mask:
case CODE_FOR_avx512f_vextractf64x4_mask:
case CODE_FOR_avx512f_vextracti64x4_mask:
+ case CODE_FOR_avx512dq_vinsertf32x8_mask:
+ case CODE_FOR_avx512dq_vinserti32x8_mask:
+ case CODE_FOR_avx512vl_vinsertv4df:
+ case CODE_FOR_avx512vl_vinsertv4di:
+ case CODE_FOR_avx512vl_vinsertv8sf:
+ case CODE_FOR_avx512vl_vinsertv8si:
error ("the last argument must be a 1-bit immediate");
return const0_rtx;
{
case CODE_FOR_avx512f_getmantv8df_mask_round:
case CODE_FOR_avx512f_getmantv16sf_mask_round:
- case CODE_FOR_avx512f_getmantv2df_round:
- case CODE_FOR_avx512f_getmantv4sf_round:
+ case CODE_FOR_avx512f_vgetmantv2df_round:
+ case CODE_FOR_avx512f_vgetmantv4sf_round:
error ("the immediate argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_avx512f_cmpv8df3_mask_round:
emit_insn (gen_zero_extendqisi2 (target, op2));
return target;
+ case IX86_BUILTIN_SBB32:
+ icode = CODE_FOR_subsi3_carry;
+ mode0 = SImode;
+ goto addcarryx;
+
+ case IX86_BUILTIN_SBB64:
+ icode = CODE_FOR_subdi3_carry;
+ mode0 = DImode;
+ goto addcarryx;
+
case IX86_BUILTIN_ADDCARRYX32:
icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
mode0 = SImode;
the vec_dupv4hi pattern. */
if (GET_MODE_SIZE (from) < 4)
return true;
-
- /* Vector registers do not support subreg with nonzero offsets, which
- are otherwise valid for integer registers. Since we can't see
- whether we have a nonzero offset from here, prohibit all
- nonparadoxical subregs changing size. */
- if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
- return true;
}
return false;
rtx this_param = x86_this_parameter (function);
rtx this_reg, tmp, fnaddr;
unsigned int tmp_regno;
+ rtx_insn *insn;
if (TARGET_64BIT)
tmp_regno = R10_REG;
{
if (sibcall_insn_operand (fnaddr, word_mode))
{
- tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
- tmp = emit_call_insn (tmp);
- SIBLING_CALL_P (tmp) = 1;
+ fnaddr = XEXP (DECL_RTL (function), 0);
+ tmp = gen_rtx_MEM (QImode, fnaddr);
+ tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
+ tmp = emit_call_insn (tmp);
+ SIBLING_CALL_P (tmp) = 1;
}
else
emit_jump_insn (gen_indirect_jump (fnaddr));
/* Emit just enough of rest_of_compilation to get the insns emitted.
Note that use_thunk calls assemble_start_function et al. */
- tmp = get_insns ();
- shorten_branches (tmp);
- final_start_function (tmp, file, 1);
- final (tmp, file, 1);
+ insn = get_insns ();
+ shorten_branches (insn);
+ final_start_function (insn, file, 1);
+ final (insn, file, 1);
final_end_function ();
}
return computed;
}
+/* Print call to TARGET to FILE. */
+
+static void
+x86_print_call_or_nop (FILE *file, const char *target)
+{
+ if (flag_nop_mcount)
+ fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
+ else
+ fprintf (file, "1:\tcall\t%s\n", target);
+}
+
/* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry. */
void
{
const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
: MCOUNT_NAME);
-
if (TARGET_64BIT)
{
#ifndef NO_PROFILE_COUNTERS
#endif
if (!TARGET_PECOFF && flag_pic)
- fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
+ fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
else
- fprintf (file, "\tcall\t%s\n", mcount_name);
+ x86_print_call_or_nop (file, mcount_name);
}
else if (flag_pic)
{
fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
LPREFIX, labelno);
#endif
- fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
+ fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
}
else
{
fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
LPREFIX, labelno);
#endif
- fprintf (file, "\tcall\t%s\n", mcount_name);
+ x86_print_call_or_nop (file, mcount_name);
+ }
+
+ if (flag_record_mcount)
+ {
+ fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
+ fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
+ fprintf (file, "\t.previous\n");
}
}
99% of cases. */
static int
-min_insn_size (rtx insn)
+min_insn_size (rtx_insn *insn)
{
int l = 0, len;
static void
ix86_avoid_jump_mispredicts (void)
{
- rtx insn, start = get_insns ();
+ rtx_insn *insn, *start = get_insns ();
int nbytes = 0, njumps = 0;
int isjump = 0;
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
{
basic_block bb = e->src;
- rtx ret = BB_END (bb);
- rtx prev;
+ rtx_insn *ret = BB_END (bb);
+ rtx_insn *prev;
bool replace = false;
if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
static int
ix86_count_insn_bb (basic_block bb)
{
- rtx insn;
+ rtx_insn *insn;
int insn_count = 0;
/* Count number of instructions in this block. Return 4 if the number
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
{
- rtx ret = BB_END (e->src);
+ rtx_insn *ret = BB_END (e->src);
if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
{
int insn_count = ix86_count_insn (e->src);
/* Pad short function. */
if (insn_count < 4)
{
- rtx insn = ret;
+ rtx_insn *insn = ret;
/* Find epilogue. */
while (insn
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
{
- rtx insn, next;
+ rtx_insn *insn, *next;
/* Find the beginning of the epilogue. */
for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
/* Return nonzero when QImode register that must be represented via REX prefix
is used. */
bool
-x86_extended_QIreg_mentioned_p (rtx insn)
+x86_extended_QIreg_mentioned_p (rtx_insn *insn)
{
int i;
extract_insn_cached (insn);
void
x86_emit_floatuns (rtx operands[2])
{
- rtx neglab, donelab, i0, i1, f0, in, out;
+ rtx_code_label *neglab, *donelab;
+ rtx i0, i1, f0, in, out;
enum machine_mode mode, inmode;
inmode = GET_MODE (operands[1]);
emit_label (donelab);
}
\f
-/* AVX512F does support 64-byte integer vector operations,
- thus the longest vector we are faced with is V64QImode. */
-#define MAX_VECT_LEN 64
-
-struct expand_vec_perm_d
-{
- rtx target, op0, op1;
- unsigned char perm[MAX_VECT_LEN];
- enum machine_mode vmode;
- unsigned char nelt;
- bool one_operand_p;
- bool testing_p;
-};
-
static bool canonicalize_perm (struct expand_vec_perm_d *d);
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
{
bool ok;
- rtx insn, dup;
+ rtx_insn *insn;
+ rtx dup;
/* First attempt to recognize VAL as-is. */
dup = gen_rtx_VEC_DUPLICATE (mode, val);
insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
if (recog_memoized (insn) < 0)
{
- rtx seq;
+ rtx_insn *seq;
/* If that fails, force VAL into a register. */
start_sequence ();
goto widen;
case V8HImode:
+ if (TARGET_AVX2)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
{
struct expand_vec_perm_d dperm;
goto widen;
case V16QImode:
+ if (TARGET_AVX2)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
goto permute;
goto widen;
case V16HImode:
case V32QImode:
- {
- enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
- rtx x = gen_reg_rtx (hvmode);
+ if (TARGET_AVX2)
+ return ix86_vector_duplicate_value (mode, target, val);
+ else
+ {
+ enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+ rtx x = gen_reg_rtx (hvmode);
- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+ gcc_assert (ok);
- x = gen_rtx_VEC_CONCAT (mode, x, x);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
- }
+ x = gen_rtx_VEC_CONCAT (mode, x, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ }
+ return true;
+
+ case V64QImode:
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ return ix86_vector_duplicate_value (mode, target, val);
+ else
+ {
+ enum machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
+ rtx x = gen_reg_rtx (hvmode);
+
+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+ gcc_assert (ok);
+
+ x = gen_rtx_VEC_CONCAT (mode, x, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ }
return true;
default:
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[64], op0, op1;
+ rtx ops[64], op0, op1, op2, op3, op4, op5;
enum machine_mode half_mode = VOIDmode;
+ enum machine_mode quarter_mode = VOIDmode;
int n, i;
switch (mode)
gen_rtx_VEC_CONCAT (mode, op0, op1)));
return;
+ case V64QImode:
+ quarter_mode = V16QImode;
+ half_mode = V32QImode;
+ goto quarter;
+
+ case V32HImode:
+ quarter_mode = V8HImode;
+ half_mode = V16HImode;
+ goto quarter;
+
+quarter:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ op0 = gen_reg_rtx (quarter_mode);
+ op1 = gen_reg_rtx (quarter_mode);
+ op2 = gen_reg_rtx (quarter_mode);
+ op3 = gen_reg_rtx (quarter_mode);
+ op4 = gen_reg_rtx (half_mode);
+ op5 = gen_reg_rtx (half_mode);
+ ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
+ n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op1,
+ &ops [n >> 2], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op2,
+ &ops [n >> 1], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op3,
+ &ops [(n >> 1) | (n >> 2)], n >> 3);
+ emit_insn (gen_rtx_SET (VOIDmode, op4,
+ gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
+ emit_insn (gen_rtx_SET (VOIDmode, op5,
+ gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op4, op5)));
+ return;
+
case V16QImode:
if (!TARGET_SSE4_1)
break;
emit_insn (gen_insert[j][i] (target, target, tmp));
return;
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
+ force_reg (QImode, GEN_INT (1 << elt))));
+ return;
+ }
+ else
+ break;
+ case V8DImode:
+ if (TARGET_AVX512F)
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
+ force_reg (QImode, GEN_INT (1 << elt))));
+ return;
+ }
+ else
+ break;
+ case V16SFmode:
+ if (TARGET_AVX512F)
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
+ force_reg (HImode, GEN_INT (1 << elt))));
+ return;
+ }
+ else
+ break;
+ case V16SImode:
+ if (TARGET_AVX512F)
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
+ force_reg (HImode, GEN_INT (1 << elt))));
+ return;
+ }
+ else
+ break;
+ case V32HImode:
+ if (TARGET_AVX512F && TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
+ force_reg (SImode, GEN_INT (1 << elt))));
+ return;
+ }
+ else
+ break;
+ case V64QImode:
+ if (TARGET_AVX512F && TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
+ force_reg (DImode, GEN_INT (1 << elt))));
+ return;
+ }
+ else
+ break;
+
default:
break;
}
}
break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (V16HImode);
+ if (elt < 16)
+ emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 15);
+ return;
+ }
+ break;
+
+ case V64QImode:
+ if (TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (V32QImode);
+ if (elt < 32)
+ emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 31);
+ return;
+ }
+ break;
+
case V16SFmode:
tmp = gen_reg_rtx (V8SFmode);
if (elt < 8)
GEN_INT (i / 2));
}
break;
+ case V64QImode:
+ case V32HImode:
case V16SImode:
case V16SFmode:
case V8DImode:
return false;
}
+/* Implement target hook libgcc_floating_mode_supported_p. */
+static bool
+ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case SFmode:
+ case DFmode:
+ case XFmode:
+ return true;
+
+ case TFmode:
+#ifdef IX86_NO_LIBGCC_TFMODE
+ return false;
+#elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
+ return TARGET_LONG_DOUBLE_128;
+#else
+ return true;
+#endif
+
+ default:
+ return false;
+ }
+}
+
/* Target hook for c_mode_for_suffix. */
static enum machine_mode
ix86_c_mode_for_suffix (char suffix)
void ix86_emit_i387_log1p (rtx op0, rtx op1)
{
- rtx label1 = gen_label_rtx ();
- rtx label2 = gen_label_rtx ();
+ rtx_code_label *label1 = gen_label_rtx ();
+ rtx_code_label *label2 = gen_label_rtx ();
rtx tmp = gen_reg_rtx (XFmode);
rtx tmp2 = gen_reg_rtx (XFmode);
rtx e1, e2, res, tmp, tmp1, half;
rtx scratch = gen_reg_rtx (HImode);
rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
- rtx jump_label = gen_label_rtx ();
+ rtx_code_label *jump_label = gen_label_rtx ();
rtx insn;
rtx (*gen_abs) (rtx, rtx);
rtx (*gen_neg) (rtx, rtx);
swapping the operands if SWAP_OPERANDS is true. The expanded
code is a forward jump to a newly created label in case the
comparison is true. The generated label rtx is returned. */
-static rtx
+static rtx_code_label *
ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
bool swap_operands)
{
enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
- rtx label, tmp;
+ rtx_code_label *label;
+ rtx tmp;
if (swap_operands)
{
*/
enum machine_mode fmode = GET_MODE (op1);
enum machine_mode imode = GET_MODE (op0);
- rtx ireg, freg, label, tmp;
+ rtx ireg, freg, tmp;
+ rtx_code_label *label;
/* reg = (long)op1 */
ireg = gen_reg_rtx (imode);
return copysign (xa, operand1);
*/
enum machine_mode mode = GET_MODE (operand0);
- rtx res, xa, label, TWO52, mask;
+ rtx res, xa, TWO52, mask;
+ rtx_code_label *label;
res = gen_reg_rtx (mode);
emit_move_insn (res, operand1);
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
- rtx xa, TWO52, tmp, label, one, res, mask;
+ rtx xa, TWO52, tmp, one, res, mask;
+ rtx_code_label *label;
TWO52 = ix86_gen_TWO52 (mode);
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
- rtx xa, xi, TWO52, tmp, label, one, res, mask;
+ rtx xa, xi, TWO52, tmp, one, res, mask;
+ rtx_code_label *label;
TWO52 = ix86_gen_TWO52 (mode);
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
- rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
+ rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
+ rtx_code_label *label;
TWO52 = ix86_gen_TWO52 (mode);
return x2;
*/
enum machine_mode mode = GET_MODE (operand0);
- rtx xa, xi, TWO52, label, res, mask;
+ rtx xa, xi, TWO52, res, mask;
+ rtx_code_label *label;
TWO52 = ix86_gen_TWO52 (mode);
ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
{
enum machine_mode mode = GET_MODE (operand0);
- rtx xa, mask, TWO52, label, one, res, smask, tmp;
+ rtx xa, mask, TWO52, one, res, smask, tmp;
+ rtx_code_label *label;
/* C code for SSE variant we expand below.
double xa = fabs (x), x2;
return copysign (xa, x);
*/
enum machine_mode mode = GET_MODE (operand0);
- rtx res, TWO52, xa, label, xi, half, mask;
+ rtx res, TWO52, xa, xi, half, mask;
+ rtx_code_label *label;
const struct real_format *fmt;
REAL_VALUE_TYPE pred_half, half_minus_pred_half;
insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
insn every time. */
-static GTY(()) rtx vselect_insn;
+static GTY(()) rtx_insn *vselect_insn;
/* Initialize vselect_insn. */
if (d->one_operand_p)
return false;
- if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
+ if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
+ && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
+ ;
+ else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
;
else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
;
switch (vmode)
{
+ case V8DFmode:
+ case V16SFmode:
case V4DFmode:
case V8SFmode:
case V2DFmode:
case V4SFmode:
case V8HImode:
case V8SImode:
+ case V32HImode:
+ case V64QImode:
+ case V16SImode:
+ case V8DImode:
for (i = 0; i < nelt; ++i)
mask |= (d->perm[i] >= nelt) << i;
break;
expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
{
unsigned i, nelt, eltsz, mask;
- unsigned char perm[32];
+ unsigned char perm[64];
enum machine_mode vmode = V16QImode;
- rtx rperm[32], vperm, target, op0, op1;
+ rtx rperm[64], vperm, target, op0, op1;
nelt = d->nelt;
op0 = gen_lowpart (V4DImode, d->op0);
op1 = gen_lowpart (V4DImode, d->op1);
rperm[0]
- = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
- || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
+ = GEN_INT ((d->perm[0] / (nelt / 2))
+ | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
if (target != d->target)
emit_move_insn (d->target, gen_lowpart (d->vmode, target));
return false;
}
}
+ else if (GET_MODE_SIZE (d->vmode) == 64)
+ {
+ if (!TARGET_AVX512BW)
+ return false;
+ if (vmode == V64QImode)
+ {
+ /* vpshufb only works intra lanes, it is not
+ possible to shuffle bytes in between the lanes. */
+ for (i = 0; i < nelt; ++i)
+ if ((d->perm[i] ^ i) & (nelt / 4))
+ return false;
+ }
+ }
else
return false;
}
mask = 2 * nelt - 1;
else if (vmode == V16QImode)
mask = nelt - 1;
+ else if (vmode == V64QImode)
+ mask = nelt / 4 - 1;
else
mask = nelt / 2 - 1;
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+ else if (vmode == V64QImode)
+ emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
else if (vmode == V8SFmode)
emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
else
rtx (*gen) (rtx, rtx) = NULL;
switch (d->vmode)
{
+ case V64QImode:
+ if (TARGET_AVX512BW)
+ gen = gen_avx512bw_vec_dupv64qi;
+ break;
case V32QImode:
gen = gen_avx2_pbroadcastv32qi_1;
break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ gen = gen_avx512bw_vec_dupv32hi;
+ break;
case V16HImode:
gen = gen_avx2_pbroadcastv16hi_1;
break;
+ case V16SImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16si;
+ break;
case V8SImode:
gen = gen_avx2_pbroadcastv8si_1;
break;
case V8HImode:
gen = gen_avx2_pbroadcastv8hi;
break;
+ case V16SFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16sf;
+ break;
case V8SFmode:
gen = gen_avx2_vec_dupv8sf_1;
break;
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8df;
+ break;
+ case V8DImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8di;
+ break;
/* For other modes prefer other shuffles this function creates. */
default: break;
}
if (expand_vec_perm_pshufb (d))
return true;
+ /* Try the AVX2 vpalignr instruction. */
+ if (expand_vec_perm_palignr (d, true))
+ return true;
+
/* Try the AVX512F vpermi2 instructions. */
- rtx vec[64];
- enum machine_mode mode = d->vmode;
- if (mode == V8DFmode)
- mode = V8DImode;
- else if (mode == V16SFmode)
- mode = V16SImode;
- for (i = 0; i < nelt; ++i)
- vec[i] = GEN_INT (d->perm[i]);
- rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
- if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
+ if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
return true;
return false;
the permutation using the SSSE3 palignr instruction. This succeeds
when all of the elements in PERM fit within one vector and we merely
need to shift them down so that a single vector permutation has a
- chance to succeed. */
+ chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
+ the vpalignr instruction itself can perform the requested permutation. */
static bool
-expand_vec_perm_palignr (struct expand_vec_perm_d *d)
+expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
{
unsigned i, nelt = d->nelt;
- unsigned min, max;
- bool in_order, ok;
+ unsigned min, max, minswap, maxswap;
+ bool in_order, ok, swap = false;
rtx shift, target;
struct expand_vec_perm_d dcopy;
- /* Even with AVX, palignr only operates on 128-bit vectors. */
- if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+ /* Even with AVX, palignr only operates on 128-bit vectors,
+ in AVX2 palignr operates on both 128-bit lanes. */
+ if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+ && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
return false;
- min = nelt, max = 0;
+ min = 2 * nelt;
+ max = 0;
+ minswap = 2 * nelt;
+ maxswap = 0;
for (i = 0; i < nelt; ++i)
{
unsigned e = d->perm[i];
+ unsigned eswap = d->perm[i] ^ nelt;
+ if (GET_MODE_SIZE (d->vmode) == 32)
+ {
+ e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
+ eswap = e ^ (nelt / 2);
+ }
if (e < min)
min = e;
if (e > max)
max = e;
+ if (eswap < minswap)
+ minswap = eswap;
+ if (eswap > maxswap)
+ maxswap = eswap;
+ }
+ if (min == 0
+ || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
+ {
+ if (d->one_operand_p
+ || minswap == 0
+ || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
+ ? nelt / 2 : nelt))
+ return false;
+ swap = true;
+ min = minswap;
+ max = maxswap;
}
- if (min == 0 || max - min >= nelt)
- return false;
/* Given that we have SSSE3, we know we'll be able to implement the
- single operand permutation after the palignr with pshufb. */
- if (d->testing_p)
+ single operand permutation after the palignr with pshufb for
+ 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
+ first. */
+ if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
return true;
dcopy = *d;
- shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
- target = gen_reg_rtx (TImode);
- emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
- gen_lowpart (TImode, d->op0), shift));
-
- dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
- dcopy.one_operand_p = true;
+ if (swap)
+ {
+ dcopy.op0 = d->op1;
+ dcopy.op1 = d->op0;
+ for (i = 0; i < nelt; ++i)
+ dcopy.perm[i] ^= nelt;
+ }
in_order = true;
for (i = 0; i < nelt; ++i)
{
- unsigned e = dcopy.perm[i] - min;
+ unsigned e = dcopy.perm[i];
+ if (GET_MODE_SIZE (d->vmode) == 32
+ && e >= nelt
+ && (e & (nelt / 2 - 1)) < min)
+ e = e - min - (nelt / 2);
+ else
+ e = e - min;
if (e != i)
in_order = false;
dcopy.perm[i] = e;
}
+ dcopy.one_operand_p = true;
+
+ if (single_insn_only_p && !in_order)
+ return false;
+
+ /* For AVX2, test whether we can permute the result in one instruction. */
+ if (d->testing_p)
+ {
+ if (in_order)
+ return true;
+ dcopy.op1 = dcopy.op0;
+ return expand_vec_perm_1 (&dcopy);
+ }
+
+ shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
+ if (GET_MODE_SIZE (d->vmode) == 16)
+ {
+ target = gen_reg_rtx (TImode);
+ emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
+ gen_lowpart (TImode, dcopy.op0), shift));
+ }
+ else
+ {
+ target = gen_reg_rtx (V2TImode);
+ emit_insn (gen_avx2_palignrv2ti (target,
+ gen_lowpart (V2TImode, dcopy.op1),
+ gen_lowpart (V2TImode, dcopy.op0),
+ shift));
+ }
+
+ dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
/* Test for the degenerate case where the alignment by itself
produces the desired permutation. */
}
ok = expand_vec_perm_1 (&dcopy);
- gcc_assert (ok);
+ gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
return ok;
}
/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
the permutation using the SSE4_1 pblendv instruction. Potentially
- reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
+ reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
static bool
expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
enum machine_mode vmode = d->vmode;
bool ok;
- /* Use the same checks as in expand_vec_perm_blend, but skipping
- AVX and AVX2 as they require more than 2 instructions. */
+ /* Use the same checks as in expand_vec_perm_blend. */
if (d->one_operand_p)
return false;
- if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
+ if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
+ ;
+ else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
+ ;
+ else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
;
else
return false;
respective lanes and 8 >= 8, but 2 not. */
if (which != 1 && which != 2)
return false;
- if (d->testing_p)
+ if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
return true;
/* First we apply one operand permutation to the part where
dcopy.perm[i] = d->perm[i] & (nelt - 1);
ok = expand_vec_perm_1 (&dcopy);
- gcc_assert (ok);
+ if (GET_MODE_SIZE (vmode) != 16 && !ok)
+ return false;
+ else
+ gcc_assert (ok);
+ if (d->testing_p)
+ return true;
/* Next we put permuted elements into their positions. */
dcopy1 = *d;
unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
unsigned HOST_WIDE_INT contents;
unsigned char remap[2 * MAX_VECT_LEN];
- rtx seq;
+ rtx_insn *seq;
bool ok, same_halves = false;
if (GET_MODE_SIZE (d->vmode) == 16)
dfirst.perm[i] = (i & (nelt2 - 1))
+ ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
+ canonicalize_perm (&dfirst);
ok = expand_vec_perm_1 (&dfirst);
gcc_assert (ok);
/* And dsecond is some single insn shuffle, taking
d->op0 and result of vperm2f128 (if perm < 16) or
d->op1 and result of vperm2f128 (otherwise). */
- dsecond.op1 = dfirst.target;
if (perm >= 16)
- dsecond.op0 = dfirst.op1;
+ dsecond.op0 = dsecond.op1;
+ dsecond.op1 = dfirst.target;
ok = expand_vec_perm_1 (&dsecond);
gcc_assert (ok);
return true;
}
- /* For one operand, the only useful vperm2f128 permutation is 0x10. */
+ /* For one operand, the only useful vperm2f128 permutation is 0x01
+ aka lanes swap. */
if (d->one_operand_p)
return false;
}
{
struct expand_vec_perm_d dfirst, dsecond;
unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
- rtx seq;
+ rtx_insn *seq;
bool ok;
rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
if (expand_vec_perm_pshuflw_pshufhw (d))
return true;
- if (expand_vec_perm_palignr (d))
+ if (expand_vec_perm_palignr (d, false))
return true;
if (expand_vec_perm_interleave2 (d))
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
- if (d.vmode == V16SImode || d.vmode == V16SFmode
- || d.vmode == V8DFmode || d.vmode == V8DImode)
- /* All implementable with a single vpermi2 insn. */
- return true;
- if (GET_MODE_SIZE (d.vmode) == 16)
+ switch (d.vmode)
{
+ case V16SFmode:
+ case V16SImode:
+ case V8DImode:
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
+ break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
+ break;
+ case V8SImode:
+ case V8SFmode:
+ case V4DFmode:
+ case V4DImode:
+ if (TARGET_AVX512VL)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
+ break;
+ case V16HImode:
+ if (TARGET_AVX2)
+ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
+ return true;
+ break;
+ case V32QImode:
+ if (TARGET_AVX2)
+ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
+ return true;
+ break;
+ case V4SImode:
+ case V4SFmode:
+ case V8HImode:
+ case V16QImode:
/* All implementable with a single vpperm insn. */
if (TARGET_XOP)
return true;
/* All implementable with 2 pshufb + 1 ior. */
if (TARGET_SSSE3)
return true;
+ break;
+ case V2DImode:
+ case V2DFmode:
/* All implementable with shufpd or unpck[lh]pd. */
- if (d.nelt == 2)
- return true;
+ return true;
+ default:
+ return false;
}
/* Extract the values from the vector CST into the permutation
gen_il = gen_avx2_interleave_lowv32qi;
gen_ih = gen_avx2_interleave_highv32qi;
break;
+ case V64QImode:
+ himode = V32HImode;
+ gen_il = gen_avx512bw_interleave_lowv64qi;
+ gen_ih = gen_avx512bw_interleave_highv64qi;
+ break;
default:
gcc_unreachable ();
}
{
/* For SSE2, we used an full interleave, so the desired
results are in the even elements. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2;
}
else
/* For AVX, the interleave used above was not cross-lane. So the
extraction is evens but with the second and third quarter swapped.
Happily, that is even one insn shorter than even extraction. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
}
case V16QImode:
case V32QImode:
+ case V32HImode:
+ case V16SImode:
+ case V64QImode:
t1 = gen_reg_rtx (wmode);
t2 = gen_reg_rtx (wmode);
ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
enum machine_mode mode = GET_MODE (op0);
rtx t1, t2, t3, t4, t5, t6;
- if (TARGET_XOP && mode == V2DImode)
+ if (TARGET_AVX512DQ && mode == V8DImode)
+ emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
+ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
+ emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
+ else if (TARGET_XOP && mode == V2DImode)
{
/* op1: A,B,C,D, op2: E,F,G,H */
op1 = gen_lowpart (V4SImode, op1);
/* Get dispatch group of insn. */
static enum dispatch_group
-get_mem_group (rtx insn)
+get_mem_group (rtx_insn *insn)
{
enum attr_memory memory;
/* Return true if insn is a compare instruction. */
static bool
-is_cmp (rtx insn)
+is_cmp (rtx_insn *insn)
{
enum attr_type type;
/* Return single or double path for instructions. */
static enum insn_path
-get_insn_path (rtx insn)
+get_insn_path (rtx_insn *insn)
{
enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
/* Return insn dispatch group. */
static enum dispatch_group
-get_insn_group (rtx insn)
+get_insn_group (rtx_insn *insn)
{
enum dispatch_group group = get_mem_group (insn);
if (group)
window WINDOW_LIST. */
static int
-count_num_restricted (rtx insn, dispatch_windows *window_list)
+count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
{
enum dispatch_group group = get_insn_group (insn);
int imm_size;
last window scheduled. */
static bool
-fits_dispatch_window (rtx insn)
+fits_dispatch_window (rtx_insn *insn)
{
dispatch_windows *window_list = dispatch_window_list;
dispatch_windows *window_list_next = dispatch_window_list->next;
dispatch window WINDOW_LIST. */
static void
-add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
+add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
{
int byte_len = min_insn_size (insn);
int num_insn = window_list->num_insn;
the window exceed allowable, it allocates a new window. */
static void
-add_to_dispatch_window (rtx insn)
+add_to_dispatch_window (rtx_insn *insn)
{
int byte_len;
dispatch_windows *window_list;
/* Print INSN dispatch information to FILE. */
DEBUG_FUNCTION static void
-debug_insn_dispatch_info_file (FILE *file, rtx insn)
+debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
{
int byte_len;
enum insn_path path;
/* This routine is the driver of the dispatch scheduler. */
static void
-do_dispatch (rtx insn, int mode)
+do_dispatch (rtx_insn *insn, int mode)
{
if (mode == DISPATCH_INIT)
init_dispatch_sched ();
/* Return TRUE if Dispatch Scheduling is supported. */
static bool
-has_dispatch (rtx insn, int action)
+has_dispatch (rtx_insn *insn, int action)
{
if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
&& flag_dispatch_scheduler)
ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
{
basic_block *bbs;
- rtx insn;
+ rtx_insn *insn;
unsigned i;
unsigned mem_count = 0;
{
for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
if (NONDEBUG_INSN_P (insn))
- for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
+ for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
+ &mem_count);
}
free (bbs);
#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
#undef TARGET_FUNCTION_ARG
#define TARGET_FUNCTION_ARG ix86_function_arg
+#undef TARGET_INIT_PIC_REG
+#define TARGET_INIT_PIC_REG ix86_init_pic_reg
+#undef TARGET_USE_PSEUDO_PIC_REG
+#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
#undef TARGET_FUNCTION_ARG_BOUNDARY
#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
#undef TARGET_PASS_BY_REFERENCE
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
+#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
+ ix86_libgcc_floating_mode_supported_p
+
#undef TARGET_C_MODE_FOR_SUFFIX
#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix