+2011-04-16 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/sse.md (sseunpackmode): New mode attribute.
+ (ssepackmode): Ditto.
+ (vec_pack_trunc_<mode>): Macroize expander from
+ vec_pack_trunc_{v8hi,v4si,v2di} using VI248_128 mode iterator.
+ (vec_unpacks_lo_<mode>): Macroize expander from
+ vec_unpacks_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+ (vec_unpacks_hi_<mode>): Macroize expander from
+ vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+ (vec_unpacku_lo_<mode>): Macroize expander from
+ vec_unpacku_lo_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+ (vec_unpacku_hi_<mode>): Macroize expander from
+ vec_unpacks_hi_{v16qi,v8hi,v4si} using VI124_128 mode iterator.
+ * config/i386/i386.c (ix86_expand_sse_unpack): Merge with
+ ix86_expand_sse4_unpack.
+ * config/i386/i386-protos.h (ix86_expand_sse4_unpack): Remove.
+
2011-04-16 Jan Hubicka <jh@suse.cz>
* cgraphbuild.c: Include ipa-inline.h.
and disregard_inline_limits flags.
(cgrpah_global_info): Remove estimated_stack_size, stack_frame_offset,
time, size, estimated_growth.
- * ipa-cp.c (ipcp_versionable_function_p, ipcp_generate_summary): Update.
+ * ipa-cp.c (ipcp_versionable_function_p, ipcp_generate_summary):
+ Update.
* cgraphunit.c (cgraph_decide_is_function_needed): Use
DECL_DISREGARD_INLINE_LIMITS.
(cgraph_analyze_function): Do not initialize
* lto-cgraph.c (lto_output_node, input_overwrite_node): Do not stream
inlinable, versionable and disregard_inline_limits.
* ipa-inline.c (cgraph_clone_inlined_nodes, cgraph_mark_inline_edge,
- cgraph_check_inline_limits, cgraph_default_inline_p, cgraph_edge_badness,
- update_caller_keys, update_callee_keys, add_new_edges_to_heap): Update.
- (cgraph_decide_inlining_of_small_function): Update; set CIF_FUNCTION_NOT_INLINABLE
- for uninlinable functions.
+ cgraph_check_inline_limits, cgraph_default_inline_p,
+ cgraph_edge_badness, update_caller_keys, update_callee_keys,
+ add_new_edges_to_heap): Update.
+ (cgraph_decide_inlining_of_small_function): Update; set
+ CIF_FUNCTION_NOT_INLINABLE for uninlinable functions.
(cgraph_decide_inlining, cgraph_edge_early_inlinable_p,
cgraph_decide_inlining_incrementally): Update.
- * ipa-inline.h (inline_summary): Add inlinable, versionable, disregard_inline_limits,
- estimated_stack_size, stack_frame_offset, time, size and estimated_growth
- parameters.
+ * ipa-inline.h (inline_summary): Add inlinable, versionable,
+ disregard_inline_limits, estimated_stack_size, stack_frame_offset,
+ time, size and estimated_growth parameters.
(estimate_edge_growth): Update.
(initialize_inline_failed): Declare.
* ipa-split.c: Include ipa-inline.h
ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
{
enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx, rtx);
- rtx se, dest;
+ rtx tmp, dest;
- switch (imode)
+ if (TARGET_SSE4_1)
{
- case V16QImode:
- if (high_p)
- unpack = gen_vec_interleave_highv16qi;
- else
- unpack = gen_vec_interleave_lowv16qi;
- break;
- case V8HImode:
- if (high_p)
- unpack = gen_vec_interleave_highv8hi;
- else
- unpack = gen_vec_interleave_lowv8hi;
- break;
- case V4SImode:
+ rtx (*unpack)(rtx, rtx);
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_sign_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_sign_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_sign_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
if (high_p)
- unpack = gen_vec_interleave_highv4si;
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ tmp = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
+ gen_lowpart (V1TImode, operands[1]),
+ GEN_INT (64)));
+ }
else
- unpack = gen_vec_interleave_lowv4si;
- break;
- default:
- gcc_unreachable ();
- }
-
- dest = gen_lowpart (imode, operands[0]);
+ tmp = operands[1];
- if (unsigned_p)
- se = force_reg (imode, CONST0_RTX (imode));
+ emit_insn (unpack (operands[0], tmp));
+ }
else
- se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
- operands[1], pc_rtx, pc_rtx);
-
- emit_insn (unpack (dest, operands[1], se));
-}
+ {
+ rtx (*unpack)(rtx, rtx, rtx);
-/* This function performs the same task as ix86_expand_sse_unpack,
- but with SSE4.1 instructions. */
+ switch (imode)
+ {
+ case V16QImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv16qi;
+ else
+ unpack = gen_vec_interleave_lowv16qi;
+ break;
+ case V8HImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv8hi;
+ else
+ unpack = gen_vec_interleave_lowv8hi;
+ break;
+ case V4SImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv4si;
+ else
+ unpack = gen_vec_interleave_lowv4si;
+ break;
+ default:
+ gcc_unreachable ();
+ }
-void
-ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
-{
- enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx);
- rtx src, dest;
+ dest = gen_lowpart (imode, operands[0]);
- switch (imode)
- {
- case V16QImode:
if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ tmp = force_reg (imode, CONST0_RTX (imode));
else
- unpack = gen_sse4_1_sign_extendv8qiv8hi2;
- break;
- case V8HImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv4hiv4si2;
- else
- unpack = gen_sse4_1_sign_extendv4hiv4si2;
- break;
- case V4SImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv2siv2di2;
- else
- unpack = gen_sse4_1_sign_extendv2siv2di2;
- break;
- default:
- gcc_unreachable ();
- }
+ tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ operands[1], pc_rtx, pc_rtx);
- dest = operands[0];
- if (high_p)
- {
- /* Shift higher 8 bytes to lower 8 bytes. */
- src = gen_reg_rtx (imode);
- emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
- gen_lowpart (V1TImode, operands[1]),
- GEN_INT (64)));
+ emit_insn (unpack (dest, operands[1], tmp));
}
- else
- src = operands[1];
-
- emit_insn (unpack (dest, src));
}
/* Expand conditional increment or decrement using adb/sbb instructions.
(define_mode_iterator VI24_128 [V8HI V4SI])
(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse
+ [(SF "sse") (DF "sse2")
+ (V4SF "sse") (V2DF "sse2")
+ (V8SF "avx") (V4DF "avx")])
+
+(define_mode_attr sse2
+ [(V16QI "sse2") (V32QI "avx")
+ (V2DI "sse2") (V4DI "avx")])
+
+(define_mode_attr sse3
+ [(V16QI "sse3") (V32QI "avx")])
+
+(define_mode_attr sse4_1
+ [(V4SF "sse4_1") (V2DF "sse4_1")
+ (V8SF "avx") (V4DF "avx")])
+
+;; Pack/unpack vector modes
+(define_mode_attr sseunpackmode
+ [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+
+(define_mode_attr ssepackmode
+ [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
+
+
;; Instruction suffix for sign and zero extensions.
(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
-;; Mapping from float mode to required SSE level
-(define_mode_attr sse
- [(SF "sse") (DF "sse2")
- (V4SF "sse") (V2DF "sse2")
- (V8SF "avx") (V4DF "avx")])
-
-(define_mode_attr sse2
- [(V16QI "sse2") (V32QI "avx")
- (V2DI "sse2") (V4DI "avx")])
-
-(define_mode_attr sse3
- [(V16QI "sse3") (V32QI "avx")])
-
-(define_mode_attr sse4_1
- [(V4SF "sse4_1") (V2DF "sse4_1")
- (V8SF "avx") (V4DF "avx")])
-
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "vec_pack_trunc_v8hi"
- [(match_operand:V16QI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")
- (match_operand:V8HI 2 "register_operand" "")]
- "TARGET_SSE2"
-{
- rtx op1 = gen_lowpart (V16QImode, operands[1]);
- rtx op2 = gen_lowpart (V16QImode, operands[2]);
- ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
- DONE;
-})
-
-(define_expand "vec_pack_trunc_v4si"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")
- (match_operand:V4SI 2 "register_operand" "")]
- "TARGET_SSE2"
-{
- rtx op1 = gen_lowpart (V8HImode, operands[1]);
- rtx op2 = gen_lowpart (V8HImode, operands[2]);
- ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
- DONE;
-})
-
-(define_expand "vec_pack_trunc_v2di"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V2DI 1 "register_operand" "")
- (match_operand:V2DI 2 "register_operand" "")]
+(define_expand "vec_pack_trunc_<mode>"
+ [(match_operand:<ssepackmode> 0 "register_operand" "")
+ (match_operand:VI248_128 1 "register_operand" "")
+ (match_operand:VI248_128 2 "register_operand" "")]
"TARGET_SSE2"
{
- rtx op1 = gen_lowpart (V4SImode, operands[1]);
- rtx op2 = gen_lowpart (V4SImode, operands[2]);
+ rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
+ rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
DONE;
})
(set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
(set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
-(define_expand "vec_unpacku_hi_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, true);
- else
- ix86_expand_sse_unpack (operands, true, true);
- DONE;
-})
-
-(define_expand "vec_unpacks_hi_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, true);
- else
- ix86_expand_sse_unpack (operands, false, true);
- DONE;
-})
-
-(define_expand "vec_unpacku_lo_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, false);
- else
- ix86_expand_sse_unpack (operands, true, false);
- DONE;
-})
-
-(define_expand "vec_unpacks_lo_v16qi"
- [(match_operand:V8HI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, false);
- else
- ix86_expand_sse_unpack (operands, false, false);
- DONE;
-})
-
-(define_expand "vec_unpacku_hi_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, true);
- else
- ix86_expand_sse_unpack (operands, true, true);
- DONE;
-})
-
-(define_expand "vec_unpacks_hi_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, true);
- else
- ix86_expand_sse_unpack (operands, false, true);
- DONE;
-})
-
-(define_expand "vec_unpacku_lo_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, false);
- else
- ix86_expand_sse_unpack (operands, true, false);
- DONE;
-})
-
-(define_expand "vec_unpacks_lo_v8hi"
- [(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "register_operand" "")]
- "TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, false);
- else
- ix86_expand_sse_unpack (operands, false, false);
- DONE;
-})
-
-(define_expand "vec_unpacku_hi_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacks_lo_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, true);
- else
- ix86_expand_sse_unpack (operands, true, true);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, false, false); DONE;")
-(define_expand "vec_unpacks_hi_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacks_hi_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, true);
- else
- ix86_expand_sse_unpack (operands, false, true);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, false, true); DONE;")
-(define_expand "vec_unpacku_lo_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacku_lo_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, true, false);
- else
- ix86_expand_sse_unpack (operands, true, false);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, true, false); DONE;")
-(define_expand "vec_unpacks_lo_v4si"
- [(match_operand:V2DI 0 "register_operand" "")
- (match_operand:V4SI 1 "register_operand" "")]
+(define_expand "vec_unpacku_hi_<mode>"
+ [(match_operand:<sseunpackmode> 0 "register_operand" "")
+ (match_operand:VI124_128 1 "register_operand" "")]
"TARGET_SSE2"
-{
- if (TARGET_SSE4_1)
- ix86_expand_sse4_unpack (operands, false, false);
- else
- ix86_expand_sse_unpack (operands, false, false);
- DONE;
-})
+ "ix86_expand_sse_unpack (operands, true, true); DONE;")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn_and_split "vec_dup<mode>"
+(define_insn "vec_dup<mode>"
[(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
(vec_duplicate:AVX256MODE24P
(match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
"@
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
#"
- "&& reload_completed && REG_P (operands[1])"
- [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
- (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
- "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+(define_split
+ [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
+ (vec_duplicate:AVX256MODE24P
+ (match_operand:<avxscalarmode> 1 "register_operand" "")))]
+ "TARGET_AVX && reload_completed"
+ [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
+ (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));")
+
(define_insn "avx_vbroadcastf128_<mode>"
[(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
(vec_concat:AVX256MODE