static void ix86_sched_init PARAMS ((FILE *, int, int));
static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
+static void ix86_init_mmx_sse_builtins PARAMS ((void));
struct ix86_address
{
static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
-static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
+static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
+ tree, rtx));
+static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
if (TARGET_SSE)
- target_flags |= MASK_MMX;
+ {
+ target_flags |= MASK_MMX;
+ x86_prefetch_sse = true;
+ }
/* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
if (TARGET_3DNOW)
emit_insn (insn);
}
+void
+ix86_expand_vector_move (mode, operands)
+ enum machine_mode mode;
+ rtx operands[];
+{
+ /* Force constants other than zero into memory. We do not know how
+ the instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], mode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+ emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (mode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], mode)
+ && !register_operand (operands[1], mode)
+ && operands[1] != CONST0_RTX (mode))
+ {
+ rtx temp = force_reg (TImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ return;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+}
+
/* Attempt to expand a binary operator. Make the expansion closer to the
actual machine, then just general_operand, which will allow 3 separate
memory references (one output, two input) in a single insn. */
{ MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
{ MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
- { MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
-
{ MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
builtins. */
-void
+static void
ix86_init_mmx_sse_builtins ()
{
const struct builtin_description * d;
= build_function_type (integer_type_node,
tree_cons (NULL_TREE, V8QI_type_node,
endlink));
- tree int_ftype_v2si
- = build_function_type (integer_type_node,
- tree_cons (NULL_TREE, V2SI_type_node,
- endlink));
- tree v2si_ftype_int
- = build_function_type (V2SI_type_node,
- tree_cons (NULL_TREE, integer_type_node,
- endlink));
tree v4sf_ftype_v4sf_int
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
endlink)));
tree void_ftype_void
= build_function_type (void_type_node, endlink);
- tree void_ftype_pchar_int
- = build_function_type (void_type_node,
- tree_cons (NULL_TREE, pchar_type_node,
- tree_cons (NULL_TREE, integer_type_node,
- endlink)));
tree void_ftype_unsigned
= build_function_type (void_type_node,
tree_cons (NULL_TREE, unsigned_type_node,
= build_function_type (unsigned_type_node, endlink);
tree di_ftype_void
= build_function_type (long_long_unsigned_type_node, endlink);
- tree ti_ftype_void
- = build_function_type (intTI_type_node, endlink);
+ tree v4sf_ftype_void
+ = build_function_type (V4SF_type_node, endlink);
tree v2si_ftype_v4sf
= build_function_type (V2SI_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, pfloat_type_node,
endlink));
- tree v4sf_ftype_float
- = build_function_type (V4SF_type_node,
- tree_cons (NULL_TREE, float_type_node,
- endlink));
- tree v4sf_ftype_float_float_float_float
- = build_function_type (V4SF_type_node,
- tree_cons (NULL_TREE, float_type_node,
- tree_cons (NULL_TREE, float_type_node,
- tree_cons (NULL_TREE,
- float_type_node,
- tree_cons (NULL_TREE,
- float_type_node,
- endlink)))));
/* @@@ the type is bogus */
tree v4sf_ftype_v4sf_pv2si
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, V2SI_type_node,
tree_cons (NULL_TREE, V2SI_type_node,
endlink)));
- tree ti_ftype_ti_ti
- = build_function_type (intTI_type_node,
- tree_cons (NULL_TREE, intTI_type_node,
- tree_cons (NULL_TREE, intTI_type_node,
- endlink)));
tree di_ftype_di_di
= build_function_type (long_long_unsigned_type_node,
tree_cons (NULL_TREE, long_long_unsigned_type_node,
V2SF_type_node,
endlink)));
- tree void_ftype_pchar
- = build_function_type (void_type_node,
- tree_cons (NULL_TREE, pchar_type_node,
- endlink));
-
/* Add all builtins that are more or less simple operations on two
operands. */
for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
case V2SImode:
type = v2si_ftype_v2si_v2si;
break;
- case TImode:
- type = ti_ftype_ti_ti;
- break;
case DImode:
type = di_ftype_di_di;
break;
}
/* Add the remaining MMX insns with somewhat more complicated types. */
- def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
- def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+ def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
+
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
- def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
- def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
- def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
/* 3DNow! extension as used in the Athlon CPU. */
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
- /* Composite intrinsics. */
- def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
- def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
- def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
- def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
- def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
- def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
- def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
+ def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
emit_insn (gen_mmx_clrdi (mode == DImode ? x
: gen_rtx_SUBREG (DImode, x, 0)));
else
- emit_insn (gen_sse_clrti (mode == TImode ? x
- : gen_rtx_SUBREG (TImode, x, 0)));
+ emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
+ : gen_rtx_SUBREG (V4SFmode, x, 0)));
return x;
}
return target;
}
+/* In type_for_mode we restrict the ability to create TImode types
+ to hosts with 64-bit H_W_I. So we've defined the SSE logicals
+ to have a V4SFmode signature. Convert them in-place to TImode. */
+
+static rtx
+ix86_expand_timode_binop_builtin (icode, arglist, target)
+ enum insn_code icode;
+ tree arglist;
+ rtx target;
+{
+ rtx pat;
+ tree arg0 = TREE_VALUE (arglist);
+ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+ rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+ op0 = gen_lowpart (TImode, op0);
+ op1 = gen_lowpart (TImode, op1);
+ target = gen_reg_rtx (TImode);
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
+ op0 = copy_to_mode_reg (TImode, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
+ op1 = copy_to_mode_reg (TImode, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+
+ return gen_lowpart (V4SFmode, target);
+}
+
/* Subroutine of ix86_expand_builtin to take care of stores. */
static rtx
-ix86_expand_store_builtin (icode, arglist, shuffle)
+ix86_expand_store_builtin (icode, arglist)
enum insn_code icode;
tree arglist;
- int shuffle;
{
rtx pat;
tree arg0 = TREE_VALUE (arglist);
op1 = safe_vector_operand (op1, mode1);
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
- if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (shuffle >= 0)
- emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
pat = GEN_FCN (icode) (op0, op1);
if (pat)
emit_insn (pat);
enum insn_code icode;
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
tree arglist = TREE_OPERAND (exp, 1);
- tree arg0, arg1, arg2, arg3;
+ tree arg0, arg1, arg2;
rtx op0, op1, op2, pat;
enum machine_mode tmode, mode0, mode1, mode2;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
emit_insn (gen_sfence ());
return 0;
- case IX86_BUILTIN_M_FROM_INT:
- target = gen_reg_rtx (DImode);
- op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
- emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
- return target;
-
- case IX86_BUILTIN_M_TO_INT:
- op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
- op0 = copy_to_mode_reg (DImode, op0);
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
- return target;
-
case IX86_BUILTIN_PEXTRW:
icode = CODE_FOR_mmx_pextrw;
arg0 = TREE_VALUE (arglist);
case IX86_BUILTIN_RCPSS:
return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
+ case IX86_BUILTIN_ANDPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
+ arglist, target);
+ case IX86_BUILTIN_ANDNPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
+ arglist, target);
+ case IX86_BUILTIN_ORPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
+ arglist, target);
+ case IX86_BUILTIN_XORPS:
+ return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
+ arglist, target);
+
case IX86_BUILTIN_LOADAPS:
return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
case IX86_BUILTIN_STOREAPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
case IX86_BUILTIN_STOREUPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
case IX86_BUILTIN_LOADSS:
return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
case IX86_BUILTIN_STORESS:
- return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
case IX86_BUILTIN_LOADHPS:
case IX86_BUILTIN_LOADLPS:
return 0;
case IX86_BUILTIN_MOVNTPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
case IX86_BUILTIN_MOVNTQ:
- return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
+ return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
case IX86_BUILTIN_LDMXCSR:
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
emit_insn (gen_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
- case IX86_BUILTIN_PREFETCH:
- icode = CODE_FOR_prefetch_sse;
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
-
- if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
- {
- /* @@@ better error message */
- error ("selector must be an immediate");
- return const0_rtx;
- }
-
- op0 = copy_to_mode_reg (Pmode, op0);
- pat = GEN_FCN (icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-
case IX86_BUILTIN_SHUFPS:
icode = CODE_FOR_sse_shufps;
arg0 = TREE_VALUE (arglist);
case IX86_BUILTIN_PMULHRW:
return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
- case IX86_BUILTIN_PREFETCH_3DNOW:
- case IX86_BUILTIN_PREFETCHW:
- icode = CODE_FOR_prefetch_3dnow;
- arg0 = TREE_VALUE (arglist);
- op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
- op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
- mode0 = insn_data[icode].operand[0].mode;
- pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
- if (! pat)
- return NULL_RTX;
- emit_insn (pat);
- return NULL_RTX;
-
case IX86_BUILTIN_PF2IW:
return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
case IX86_BUILTIN_PSWAPDSF:
return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
- /* Composite intrinsics. */
- case IX86_BUILTIN_SETPS1:
- target = assign_386_stack_local (SFmode, 0);
- arg0 = TREE_VALUE (arglist);
- emit_move_insn (adjust_address (target, SFmode, 0),
- expand_expr (arg0, NULL_RTX, VOIDmode, 0));
- op0 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
- emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
- return op0;
-
- case IX86_BUILTIN_SETPS:
- target = assign_386_stack_local (V4SFmode, 0);
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
- emit_move_insn (adjust_address (target, SFmode, 0),
- expand_expr (arg0, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 4),
- expand_expr (arg1, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 8),
- expand_expr (arg2, NULL_RTX, VOIDmode, 0));
- emit_move_insn (adjust_address (target, SFmode, 12),
- expand_expr (arg3, NULL_RTX, VOIDmode, 0));
- op0 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_movaps (op0, target));
- return op0;
-
- case IX86_BUILTIN_CLRPS:
- target = gen_reg_rtx (TImode);
- emit_insn (gen_sse_clrti (target));
- return target;
-
- case IX86_BUILTIN_LOADRPS:
- target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
- gen_reg_rtx (V4SFmode), 1);
- emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
+ case IX86_BUILTIN_SSE_ZERO:
+ target = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_clrv4sf (target));
return target;
- case IX86_BUILTIN_LOADPS1:
- target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
- gen_reg_rtx (V4SFmode), 1);
- emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
- return target;
-
- case IX86_BUILTIN_STOREPS1:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
- case IX86_BUILTIN_STORERPS:
- return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
-
case IX86_BUILTIN_MMX_ZERO:
target = gen_reg_rtx (DImode);
emit_insn (gen_mmx_clrdi (target));
;; 32 This is a `maskmov' operation.
;; 33 This is a `movmsk' operation.
;; 34 This is a `non-temporal' move.
-;; 35 This is a `prefetch' (SSE) operation.
;; 36 This is used to distinguish COMISS from UCOMISS.
;; 37 This is a `ldmxcsr' operation.
;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
(define_insn "movv4sf_internal"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SF 1 "general_operand" "xm,x"))]
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SI 1 "general_operand" "xm,x"))]
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V8QI 1 "general_operand" "ym,y"))]
+ (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv4hi_internal"
[(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V4HI 1 "general_operand" "ym,y"))]
+ (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2si_internal"
[(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SI 1 "general_operand" "ym,y"))]
+ (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SF 1 "general_operand" "ym,y"))]
+ (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
"TARGET_3DNOW"
"movq\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
- {
- ix86_expand_move (TImode, operands);
- DONE;
- }
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], TImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (TImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], TImode)
- && !register_operand (operands[1], TImode)
- && operands[1] != CONST0_RTX (TImode))
- {
- rtx temp = force_reg (TImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_move (TImode, operands);
+ else
+ ix86_expand_vector_move (TImode, operands);
+ DONE;
})
(define_expand "movv4sf"
(match_operand:V4SF 1 "general_operand" ""))]
"TARGET_SSE"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V4SFmode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V4SFmode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V4SFmode)
- && !register_operand (operands[1], V4SFmode)
- && operands[1] != CONST0_RTX (V4SFmode))
- {
- rtx temp = force_reg (V4SFmode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V4SFmode, operands);
+ DONE;
})
(define_expand "movv4si"
(match_operand:V4SI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V4SImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V4SImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V4SImode)
- && !register_operand (operands[1], V4SImode)
- && operands[1] != CONST0_RTX (V4SImode))
- {
- rtx temp = force_reg (V4SImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V4SImode, operands);
+ DONE;
})
(define_expand "movv2si"
(match_operand:V2SI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V2SImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V2SImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V2SImode)
- && !register_operand (operands[1], V2SImode)
- && operands[1] != CONST0_RTX (V2SImode))
- {
- rtx temp = force_reg (V2SImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V2SImode, operands);
+ DONE;
})
(define_expand "movv4hi"
(match_operand:V4HI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V4HImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V4HImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V4HImode)
- && !register_operand (operands[1], V4HImode)
- && operands[1] != CONST0_RTX (V4HImode))
- {
- rtx temp = force_reg (V4HImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V4HImode, operands);
+ DONE;
})
(define_expand "movv8qi"
(match_operand:V8QI 1 "general_operand" ""))]
"TARGET_MMX"
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V8QImode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V8QImode, addr);
- }
-
- /* Make operand1 a register if it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V8QImode)
- && !register_operand (operands[1], V8QImode)
- && operands[1] != CONST0_RTX (V8QImode))
- {
- rtx temp = force_reg (V8QImode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
+ ix86_expand_vector_move (V8QImode, operands);
+ DONE;
})
(define_expand "movv2sf"
[(set (match_operand:V2SF 0 "general_operand" "")
(match_operand:V2SF 1 "general_operand" ""))]
"TARGET_3DNOW"
- "
{
- /* For constants other than zero into memory. We do not know how the
- instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
- && register_operand (operands[0], V2SFmode)
- && CONSTANT_P (operands[1]))
- {
- rtx addr = gen_reg_rtx (Pmode);
-
- emit_move_insn (addr,
- XEXP (force_const_mem (V2SFmode, operands[1]), 0));
- operands[1] = gen_rtx_MEM (V2SFmode, addr);
- }
-
- /* Make operand1 a register is it isn't already. */
- if ((reload_in_progress | reload_completed) == 0
- && !register_operand (operands[0], V2SFmode)
- && !register_operand (operands[1], V2SFmode)
- && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
- && operands[1] != CONST0_RTX (V2SFmode))
- {
- rtx temp = force_reg (V2SFmode, operands[1]);
- emit_move_insn (operands[0], temp);
- DONE;
- }
-}")
+ ix86_expand_vector_move (V2SFmode, operands);
+ DONE;
+})
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
[(set_attr "type" "mmx")])
(define_insn "movti_internal"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
- (match_operand:TI 1 "general_operand" "xm,x"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:TI 1 "general_operand" "O,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
"@
+ xorps\t%0, %0
movaps\t{%1, %0|%0, %1}
movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "*movti_rex64"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,x,m"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
+ (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
#
#
+ xorps\t%0, %0
movaps\\t{%1, %0|%0, %1}
movaps\\t{%1, %0|%0, %1}"
- [(set_attr "type" "*,*,sse,sse")
+ [(set_attr "type" "*,*,sse,sse,sse")
(set_attr "mode" "TI")])
(define_split
;; movaps or movups
(define_insn "sse_movaps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))]
"TARGET_SSE"
"@
movaps\t{%1, %0|%0, %1}
(define_insn "sse_movups"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))]
"TARGET_SSE"
"@
movups\t{%1, %0|%0, %1}
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 12)))]
- "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 3)))]
- "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"addss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "subv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"subps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"subss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmmulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"mulss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmdivv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"divss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))]
"TARGET_SSE"
"rsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
"TARGET_SSE"
"rsqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"sqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
- (match_operand:V4SF 2 "register_operand" "0")
- (const_int 1)))]
+ (vec_merge:V4SF
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
"TARGET_SSE"
"sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
-
;; SSE logical operations.
;; These are not called andti3 etc. because we really really don't want
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
-(define_insn "sse_clrti"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (unspec:TI [(const_int 0)] 45))]
+(define_insn "sse_clrv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(const_int 0)] 45))]
"TARGET_SSE"
"xorps\t{%0, %0|%0, %0}"
[(set_attr "type" "sse")
(define_insn "maskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")]))]
"TARGET_SSE"
"cmp%D3ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
[(set (match_operand:V4SI 0 "register_operand" "=x")
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")])))]
"TARGET_SSE"
- "*
{
if (GET_CODE (operands[3]) == UNORDERED)
- return \"cmpordps\t{%2, %0|%0, %2}\";
-
- return \"cmpn%D3ps\t{%2, %0|%0, %2}\";
-}"
+ return "cmpordps\t{%2, %0|%0, %2}";
+ else
+ return "cmpn%D3ps\t{%2, %0|%0, %2}";
+}
[(set_attr "type" "sse")])
(define_insn "vmmaskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")])
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")])
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
(vec_merge:V4SI
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
- [(match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "x")]))
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")]))
(subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
- "*
{
if (GET_CODE (operands[3]) == UNORDERED)
- return \"cmpordss\t{%2, %0|%0, %2}\";
-
- return \"cmpn%D3ss\t{%2, %0|%0, %2}\";
-}"
+ return "cmpordss\t{%2, %0|%0, %2}";
+ else
+ return "cmpn%D3ss\t{%2, %0|%0, %2}";
+}
[(set_attr "type" "sse")])
(define_insn "sse_comi"
(define_insn "vmsmaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"maxss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmsminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
+ (vec_merge:V4SF
+ (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
"TARGET_SSE"
"minss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtpi2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (vec_duplicate:V4SF
- (float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
- (const_int 12)))]
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+ (const_int 12)))]
"TARGET_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
- (parallel
- [(const_int 0)
- (const_int 1)])))]
+ (vec_select:V2SI
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvtps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
- (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
- (parallel
- [(const_int 0)
- (const_int 1)])))]
+ (vec_select:V2SI
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
+ (parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvttps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvtsi2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
- (vec_duplicate:V4SF
- (float:SF (match_operand:SI 2 "register_operand" "rm")))
- (const_int 14)))]
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 14)))]
"TARGET_SSE"
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
- (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
- (parallel [(const_int 0)])))]
+ (vec_select:SI
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
- (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
- (parallel [(const_int 0)])))]
+ (vec_select:SI
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
+ (parallel [(const_int 0)])))]
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
- (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
- (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (mult:V4SI (sign_extend:V4SI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_MMX"
"pmulhw\t{%2, %0|%0, %2}"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
- (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
- (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (mult:V4SI (zero_extend:V4SI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmulhuw\t{%2, %0|%0, %2}"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(plus:V2SI
(mult:V2SI
- (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
- (parallel [(const_int 0)
- (const_int 2)])))
- (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
- (parallel [(const_int 0)
- (const_int 2)]))))
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 1)
[(set_attr "type" "sse")
(set_attr "memory" "unknown")])
-(define_expand "prefetch"
- [(prefetch (match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "const_int_operand" "n")
- (match_operand:SI 2 "const_int_operand" "n"))]
- "TARGET_PREFETCH_SSE || TARGET_3DNOW"
- "
-{
- int rw = INTVAL (operands[1]);
- int locality = INTVAL (operands[2]);
- if (rw != 0 && rw != 1)
- abort ();
- if (locality < 0 || locality > 3)
- abort ();
- /* Use 3dNOW prefetch in case we are asking for write prefetch not
- suported by SSE counterpart or the SSE prefetch is not available
- (K6 machines). Otherwise use SSE prefetch as it allows specifying
- of locality. */
- if (TARGET_3DNOW
- && (!TARGET_PREFETCH_SSE || rw))
- {
- emit_insn (gen_prefetch_3dnow (operands[0], operands[1]));
- }
- else
- {
- int i;
- switch (locality)
- {
- case 0: /* No temporal locality. */
- i = 0;
- break;
- case 1: /* Lowest level of temporal locality. */
- i = 3;
- break;
- case 2: /* Moderate level of temporal locality. */
- i = 2;
- break;
- case 3: /* Highest level of temporal locality. */
- i = 1;
- break;
- default:
- abort (); /* We already checked for valid values above. */
- break;
- }
- emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
- }
- DONE;
-}")
-
-(define_insn "prefetch_sse"
- [(unspec [(match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "immediate_operand" "n")] 35)]
- "TARGET_PREFETCH_SSE"
-{
- switch (INTVAL (operands[1]))
- {
- case 0:
- return "prefetchnta\t%a0";
- case 1:
- return "prefetcht0\t%a0";
- case 2:
- return "prefetcht1\t%a0";
- case 3:
- return "prefetcht2\t%a0";
- default:
- abort ();
- }
-}
- [(set_attr "type" "sse")])
-
(define_expand "sse_prologue_save"
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI 21)
"femms"
[(set_attr "type" "mmx")])
-(define_insn "prefetch_3dnow"
- [(prefetch (match_operand:SI 0 "address_operand" "p")
- (match_operand:SI 1 "const_int_operand" "n")
- (const_int 0))]
- "TARGET_3DNOW"
-{
- if (INTVAL (operands[1]) == 0)
- return "prefetch\t%a0";
- else
- return "prefetchw\t%a0";
-}
- [(set_attr "type" "mmx")])
-
(define_insn "pf2id"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW_A"
"pswapd\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
+
+(define_expand "prefetch"
+ [(prefetch (match_operand:SI 0 "address_operand" "")
+ (match_operand:SI 1 "const_int_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+{
+ int rw = INTVAL (operands[1]);
+ int locality = INTVAL (operands[2]);
+ if (rw != 0 && rw != 1)
+ abort ();
+ if (locality < 0 || locality > 3)
+ abort ();
+
+ /* Use 3dNOW prefetch in case we are asking for write prefetch not
+ suported by SSE counterpart or the SSE prefetch is not available
+ (K6 machines). Otherwise use SSE prefetch as it allows specifying
+ of locality. */
+ if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+ {
+ operands[2] = GEN_INT (3);
+ }
+ else
+ {
+ operands[1] = const0_rtx;
+ }
+})
+
+(define_insn "*prefetch_sse"
+ [(prefetch (match_operand:SI 0 "address_operand" "")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE"
+{
+ static const char * const patterns[4] = {
+ "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ if (locality < 0 || locality > 3)
+ abort ();
+
+ return patterns[locality];
+}
+ [(set_attr "type" "sse")])
+
+(define_insn "*prefetch_3dnow"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (const_int 0))]
+ "TARGET_3DNOW"
+{
+ if (INTVAL (operands[1]) == 0)
+ return "prefetch\t%a0";
+ else
+ return "prefetchw\t%a0";
+}
+ [(set_attr "type" "mmx")])