(UNSPEC_MPSADBW 138)
(UNSPEC_PHMINPOSUW 139)
(UNSPEC_PTEST 140)
- (UNSPEC_ROUNDP 141)
- (UNSPEC_ROUNDS 142)
+ (UNSPEC_ROUND 141)
; For SSE4.2 support
(UNSPEC_CRC32 143)
})
\f
+(define_insn "sse4_1_round<mode>2"
+ [(set (match_operand:SSEMODEF 0 "register_operand" "=x")
+ (unspec:SSEMODEF [(match_operand:SSEMODEF 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_SSE4_1"
+ "rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "rintxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
- && !optimize_size)"
+ && (TARGET_SSE4_1 || !optimize_size))"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
- && !optimize_size)
- ix86_expand_rint (operand0, operand1);
+ && (TARGET_SSE4_1 || !optimize_size))
+ {
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x04)));
+ else
+ ix86_expand_rint (operand0, operand1);
+ }
else
{
rtx op0 = gen_reg_rtx (XFmode);
&& !flag_trapping_math && !flag_rounding_math
&& !optimize_size"
{
- if ((<MODE>mode != DFmode) || TARGET_64BIT)
+ if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_round (operand0, operand1);
else
ix86_expand_rounddf_32 (operand0, operand1);
DONE;
})
-(define_expand "floordf2"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
+(define_expand "floor<mode>2"
+ [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+ (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations && !optimize_size)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))"
{
- if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))
{
- if (TARGET_64BIT)
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x01)));
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, true);
else
ix86_expand_floorceildf_32 (operand0, operand1, true);
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_floor (op0, op1));
-
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
- }
- DONE;
-})
-
-(define_expand "floorsf2"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
-{
- if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
- ix86_expand_floorceil (operand0, operand1, true);
- else
- {
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
-
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
DONE;
})
-(define_expand "ceildf2"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
+(define_expand "ceil<mode>2"
+ [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+ (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations && !optimize_size)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))"
{
- if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))
{
- if (TARGET_64BIT)
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x02)));
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, false);
else
ix86_expand_floorceildf_32 (operand0, operand1, false);
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_ceil (op0, op1));
-
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
- }
- DONE;
-})
-
-(define_expand "ceilsf2"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
-{
- if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
- ix86_expand_floorceil (operand0, operand1, false);
- else
- {
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
-
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
DONE;
})
-(define_expand "btruncdf2"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
+(define_expand "btrunc<mode>2"
+ [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+ (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations && !optimize_size)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))"
{
- if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))
{
- if (TARGET_64BIT)
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x03)));
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_trunc (operand0, operand1);
else
ix86_expand_truncdf_32 (operand0, operand1);
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_trunc (op0, op1));
-
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
- }
- DONE;
-})
-
-(define_expand "btruncsf2"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
-{
- if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
- ix86_expand_trunc (operand0, operand1);
- else
- {
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
-
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_trunc (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
[(set (match_operand:V2DF 0 "register_operand" "=x")
(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
(match_operand:SI 2 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDP))]
+ UNSPEC_ROUND))]
"TARGET_SSE4_1"
"roundpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
(match_operand:SI 2 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDP))]
+ UNSPEC_ROUND))]
"TARGET_SSE4_1"
"roundps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
(vec_merge:V2DF
(unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
(match_operand:SI 3 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDS)
+ UNSPEC_ROUND)
(match_operand:V2DF 1 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE4_1"
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
(match_operand:SI 3 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDS)
+ UNSPEC_ROUND)
(match_operand:V4SF 1 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE4_1"
(match_operand:SI 3 "register_operand" "d,d,d,d")
(match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
UNSPEC_PCMPESTR))
- (clobber (match_scratch:SI 5 "=c,c,X,X"))
- (clobber (match_scratch:V16QI 6 "=X,X,Y0,Y0"))]
+ (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
+ (clobber (match_scratch:SI 6 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
- pcmpestri\t{%4, %2, %0|%0, %2, %4}
- pcmpestri\t{%4, %2, %0|%0, %2, %4}
pcmpestrm\t{%4, %2, %0|%0, %2, %4}
- pcmpestrm\t{%4, %2, %0|%0, %2, %4}"
+ pcmpestrm\t{%4, %2, %0|%0, %2, %4}
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
(match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
UNSPEC_PCMPISTR))
- (clobber (match_scratch:SI 3 "=c,c,X,X"))
- (clobber (match_scratch:V16QI 4 "=X,X,Y0,Y0"))]
+ (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
+ (clobber (match_scratch:SI 4 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
- pcmpistri\t{%2, %1, %0|%0, %1, %2}
- pcmpistri\t{%2, %1, %0|%0, %1, %2}
pcmpistrm\t{%2, %1, %0|%0, %1, %2}
- pcmpistrm\t{%2, %1, %0|%0, %1, %2}"
+ pcmpistrm\t{%2, %1, %0|%0, %1, %2}
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")