From: Uros Bizjak Date: Wed, 28 Mar 2012 21:28:15 +0000 (+0200) Subject: i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2310e4504dd63b79f8062abf1f33321416ad92c8;p=gcc.git i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes. * config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes. (ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls. From-SVN: r185927 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6a7605ef18a..f84e25e2686 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2012-03-28 Uros Bizjak + + * config/i386/i386.c (ix86_modes_tieable_p): Handle 32bit AVX modes. + (ix86_expand_vector_move_misalign): Remove un-needed gen_lowpart calls. + 2012-03-28 Jakub Jelinek PR middle-end/52691 @@ -16,22 +21,20 @@ (loop_optimizer_finalize): If loops are to be preserved only clean up optional loop features. (rtl_loop_done): Forcefully free loops here. - * cgraph.c (cgraph_release_function_body): Forcefully free - loops. + * cgraph.c (cgraph_release_function_body): Forcefully free loops. * cfgexpand.c (expand_gimple_cond): Properly add new basic-blocks to existing loops. (construct_init_block): Likewise. (construct_exit_block): Likewise. (gimple_expand_cfg): Clear LOOP_CLOSED_SSA loop state. Cleanup the CFG after expanding. - * cfgloop.c (verify_loop_structure): Calculate or verify - dominators. If we needed to calculate them, free them afterwards. + * cfgloop.c (verify_loop_structure): Calculate or verify dominators. + If we needed to calculate them, free them afterwards. * tree-pass.h (PROP_loops): New define. * tree-ssa-loop.c (pass_tree_loop_init): Provide PROP_loops. * basic-block.h (CLEANUP_CFG_CHANGED): New. * cfgcleanup.c (merge_blocks_move): Protect loop latches. - (cleanup_cfg): If we did something and have loops around, fix - them up. + (cleanup_cfg): If we did something and have loops around, fix them up. * cse.c (rest_of_handle_cse_after_global_opts): Call cleanup_cfg with CLEANUP_CFG_CHANGED. * cfghooks.c (merge_blocks): If we merge a loop header into @@ -84,8 +87,7 @@ PR target/52737 * config.gcc (tm_file): Remove avr/multilib.h. - * doc/invoke.texi (AVR Options): Adjust - documentation of -mtiny-stack. + * doc/invoke.texi (AVR Options): Adjust documentation of -mtiny-stack. * config/avr/genmultilib.awk: Remove code to generate multilib.h. (BEGIN): Use -msp8 as multilib option instead of -mtiny-stack. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 18172a10825..5029be6c085 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15831,17 +15831,18 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) switch (GET_MODE_SIZE (mode)) { case 16: - /* If we're optimizing for size, movups is the smallest. */ if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); emit_insn (gen_sse_movups (op0, op1)); - return; } - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); + else + { + op0 = gen_lowpart (V16QImode, op0); + op1 = gen_lowpart (V16QImode, op1); + emit_insn (gen_sse2_movdqu (op0, op1)); + } break; case 32: op0 = gen_lowpart (V32QImode, op0); @@ -15853,27 +15854,22 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) } break; case MODE_VECTOR_FLOAT: - op0 = gen_lowpart (mode, op0); - op1 = gen_lowpart (mode, op1); - switch (mode) { case V4SFmode: emit_insn (gen_sse_movups (op0, op1)); break; - case V8SFmode: - ix86_avx256_split_vector_move_misalign (op0, op1); - break; case V2DFmode: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); emit_insn (gen_sse_movups (op0, op1)); - return; } - emit_insn (gen_sse2_movupd (op0, op1)); + else + emit_insn (gen_sse2_movupd (op0, op1)); break; + case V8SFmode: case V4DFmode: ix86_avx256_split_vector_move_misalign (op0, op1); break; @@ -15918,8 +15914,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) { - op0 = gen_lowpart (V2DFmode, op0); - op1 = gen_lowpart (V2DFmode, op1); emit_insn (gen_sse2_movupd (op0, op1)); return; } @@ -15984,8 +15978,8 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) return; } - /* ??? Similar to above, only less clear because of quote - typeless stores unquote. */ + /* ??? Similar to above, only less clear + because of typeless stores. */ if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) { @@ -15998,11 +15992,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (TARGET_SSE2 && mode == V2DFmode) { if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) - { - op0 = gen_lowpart (V2DFmode, op0); - op1 = gen_lowpart (V2DFmode, op1); - emit_insn (gen_sse2_movupd (op0, op1)); - } + emit_insn (gen_sse2_movupd (op0, op1)); else { m = adjust_address (op0, DFmode, 0); @@ -31399,6 +31389,10 @@ ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) /* If MODE2 is only appropriate for an SSE register, then tie with any other mode acceptable to SSE registers. */ + if (GET_MODE_SIZE (mode2) == 32 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) + return (GET_MODE_SIZE (mode1) == 32 + && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); if (GET_MODE_SIZE (mode2) == 16 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) return (GET_MODE_SIZE (mode1) == 16