From: Uros Bizjak Date: Wed, 23 Jun 2010 19:02:42 +0000 (+0200) Subject: i386 (mov): Macroize expander from mov{sf,df,xf} using X87MODEF mode iterator. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bc051083b03021f18372f165243117688a023d77;p=gcc.git i386 (mov): Macroize expander from mov{sf,df,xf} using X87MODEF mode iterator. * config/i386/i386 (mov): Macroize expander from mov{sf,df,xf} using X87MODEF mode iterator. (pushsf splitter): Macroize splitter using P mode iterator. (*swap): Macroize insn from *swap{sf,df} using MODEF mode iterator. (*movxf_internal): Rename from *movxf_integer. (*movxf_internal_nointeger): Rename from *movxf_nointeger. (*movdf_internal_rex64): Rename from *movdf_integer_rex64. (*movdf_internal): Rename from *movdf_integer. (*movdf_internal_nointeger): Rename from *movdf_nointeger. (*movsf_internal): Rename from *movdf_1. From-SVN: r161287 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dd0855604f9..59ccdfb4ad0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2010-06-23 Uros Bizjak + + * config/i386/i386 (mov): Macroize expander from mov{sf,df,xf} + using X87MODEF mode iterator. + (pushsf splitter): Macroize splitter using P mode iterator. + (*swap): Macroize insn from *swap{sf,df} using MODEF + mode iterator. + + (*movxf_internal): Rename from *movxf_integer. + (*movxf_internal_nointeger): Rename from *movxf_nointeger. + (*movdf_internal_rex64): Rename from *movdf_integer_rex64. + (*movdf_internal): Rename from *movdf_integer. + (*movdf_internal_nointeger): Rename from *movdf_nointeger. + (*movsf_internal): Rename from *movdf_1. + 2010-06-23 Basile Starynkevitch * coretypes.h: (gimple_seq_node_d, gimple_seq_node) @@ -104,7 +119,7 @@ (SWI48x): Ditto. (SWI12): Ditto. (SWI24): Ditto. - + (mov): Macroize expander from mov{qi,hi,si,di} using SWI1248x mode iterator. (*push2_rex64): Macroize insn from *push{qi,hi,si}_rex64 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ab90d7300cf..1f7369b5964 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2604,24 +2604,140 @@ ;; Floating point move instructions. -(define_expand "movsf" - [(set (match_operand:SF 0 "nonimmediate_operand" "") - (match_operand:SF 1 "general_operand" ""))] +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_expand "mov" + [(set (match_operand:X87MODEF 0 "nonimmediate_operand" "") + (match_operand:X87MODEF 1 "general_operand" ""))] "" - "ix86_expand_move (SFmode, operands); DONE;") + "ix86_expand_move (mode, operands); DONE;") -(define_insn "*pushsf" - [(set (match_operand:SF 0 "push_operand" "=<,<,<") - (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))] - "!TARGET_64BIT" +(define_insn "*pushtf" + [(set (match_operand:TF 0 "push_operand" "=<,<,<") + (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] + "TARGET_SSE2" { - /* Anything else should be already split before reg-stack. */ - gcc_assert (which_alternative == 1); - return "push{l}\t%1"; + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); } - [(set_attr "type" "multi,push,multi") + [(set_attr "type" "multi") + (set_attr "unit" "sse,*,*") + (set_attr "mode" "TF,SI,SI")]) + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "TARGET_SSE2 && reload_completed + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "any_fp_register_operand" ""))] + "TARGET_SSE2" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) + (set (mem:TF (reg:P SP_REG)) (match_dup 1))] + "") + +(define_insn "*pushxf" + [(set (match_operand:XF 0 "push_operand" "=<,<") + (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] + "optimize_function_for_speed_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*") + (set_attr "mode" "XF,SI")]) + +;; Size of pushxf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushxf using integer instructions is 3+3*memory operand size +;; Pushing using integer instructions is longer except for constants +;; and direct memory references (assuming that any given constant is pushed +;; only once, but this ought to be handled elsewhere). + +(define_insn "*pushxf_nointeger" + [(set (match_operand:XF 0 "push_operand" "=X,X,X") + (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] + "optimize_function_for_size_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") (set_attr "unit" "i387,*,*") - (set_attr "mode" "SF,SI,SF")]) + (set_attr "mode" "XF,SI,SI")]) + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "any_fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "reload_completed + && !ANY_FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushdf" + [(set (match_operand:DF 0 "push_operand" "=<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] + "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "DF,SI,DF")]) + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 2+2*memory operand size +;; On the average, pushdf using integers can be still shorter. Allow this +;; pattern for optimize_size too. + +(define_insn "*pushdf_nointeger" + [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] + "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*,*") + (set_attr "mode" "DF,SI,SI,DF")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "any_fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (match_dup 1))] + "") + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed + && !ANY_FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") (define_insn "*pushsf_rex64" [(set (match_operand:SF 0 "push_operand" "=X,X,X") @@ -2636,6 +2752,19 @@ (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,DI,SF")]) +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))] + "!TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{l}\t%1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,SI,SF")]) + (define_split [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "memory_operand" ""))] @@ -2649,202 +2778,148 @@ (define_split [(set (match_operand:SF 0 "push_operand" "") (match_operand:SF 1 "any_fp_register_operand" ""))] - "!TARGET_64BIT" - [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) - (set (mem:SF (reg:SI SP_REG)) (match_dup 1))]) - -(define_split - [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "any_fp_register_operand" ""))] - "TARGET_64BIT" - [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) - (set (mem:SF (reg:DI SP_REG)) (match_dup 1))]) + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:SF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (-GET_MODE_SIZE (mode));") -(define_insn "*movsf_1" - [(set (match_operand:SF 0 "nonimmediate_operand" - "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") - (match_operand:SF 1 "general_operand" - "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] - "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && (reload_in_progress || reload_completed - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) - && standard_80387_constant_p (operands[1])) - || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], SFmode))" +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + "TARGET_SSE2 + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (which_alternative) { case 0: case 1: - return output_387_reg_move (insn, operands); - - case 2: - return standard_80387_constant_opcode (operands[1]); - - case 3: - case 4: - return "mov{l}\t{%1, %0|%0, %1}"; - case 5: - if (get_attr_mode (insn) == MODE_TI) - return "%vpxor\t%0, %d0"; - else - return "%vxorps\t%0, %d0"; - case 6: if (get_attr_mode (insn) == MODE_V4SF) return "%vmovaps\t{%1, %0|%0, %1}"; else - return "%vmovss\t{%1, %d0|%d0, %1}"; - case 7: - if (TARGET_AVX) - return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}" - : "vmovss\t{%1, %0|%0, %1}"; + return "%vmovdqa\t{%1, %0|%0, %1}"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; else - return "movss\t{%1, %0|%0, %1}"; - case 8: - return "%vmovss\t{%1, %0|%0, %1}"; - - case 9: case 10: case 14: case 15: - return "movd\t{%1, %0|%0, %1}"; - case 12: case 13: - return "%vmovd\t{%1, %0|%0, %1}"; - - case 11: - return "movq\t{%1, %0|%0, %1}"; - + return "%vpxor\t%0, %d0"; + case 3: + case 4: + return "#"; default: gcc_unreachable (); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") - (set (attr "prefix") - (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") - (const_string "maybe_vex") - (const_string "orig"))) + [(set_attr "type" "ssemov,ssemov,sselog1,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") (set (attr "mode") - (cond [(eq_attr "alternative" "3,4,9,10") - (const_string "SI") - (eq_attr "alternative" "5") + (cond [(eq_attr "alternative" "0,2") (if_then_else - (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") - (const_int 0)) - (ne (symbol_ref "TARGET_SSE2") - (const_int 0))) - (eq (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0))) - (const_string "TI") - (const_string "V4SF")) - /* For architectures resolving dependencies on - whole SSE registers use APS move to break dependency - chains, otherwise use short move to avoid extra work. - - Do the same for architectures resolving dependencies on - the parts. While in DF mode it is better to always handle - just register parts, the SF mode is different due to lack - of instructions to load just part of the register. It is - better to maintain the whole registers in single format - to avoid problems on using packed logical operations. */ - (eq_attr "alternative" "6") + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") (const_int 0)) - (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))) (const_string "V4SF") - (const_string "SF")) - (eq_attr "alternative" "11") - (const_string "DI")] - (const_string "SF")))]) + (const_string "TI"))] + (const_string "DI")))]) -(define_insn "*swapsf" - [(set (match_operand:SF 0 "fp_register_operand" "+f") - (match_operand:SF 1 "fp_register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "reload_completed || TARGET_80387" +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed + && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movxf_internal" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] + "optimize_function_for_speed_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" { - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "SF")]) + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); -(define_expand "movdf" - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (match_operand:DF 1 "general_operand" ""))] - "" - "ix86_expand_move (DFmode, operands); DONE;") + case 2: + return standard_80387_constant_opcode (operands[1]); -;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer instructions is 2+2*memory operand size -;; On the average, pushdf using integers can be still shorter. Allow this -;; pattern for optimize_size too. + case 3: case 4: + return "#"; -(define_insn "*pushdf_nointeger" - [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] - "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); + default: + gcc_unreachable (); + } } - [(set_attr "type" "multi") - (set_attr "unit" "i387,*,*,*") - (set_attr "mode" "DF,SI,SI,DF")]) + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) -(define_insn "*pushdf_integer" - [(set (match_operand:DF 0 "push_operand" "=<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] - "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" +;; Do not use integer registers when optimizing for size +(define_insn "*movxf_internal_nointeger" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] + "optimize_function_for_size_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || standard_80387_constant_p (operands[1]) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" { - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*,*") - (set_attr "mode" "DF,SI,DF")]) + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); -;; %%% Kill this when call knows how to work this out. -(define_split - [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "any_fp_register_operand" ""))] - "reload_completed" - [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) - (set (mem:DF (reg:P SP_REG)) (match_dup 1))] - "") + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) (define_split - [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "general_operand" ""))] - "reload_completed" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "reload_completed + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") -;; Moving is usually shorter when only FP registers are used. This separate -;; movdf pattern avoids the use of integer registers for FP operations -;; when optimizing for size. - -(define_insn "*movdf_nointeger" +(define_insn "*movdf_internal_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") + "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ") (match_operand:DF 1 "general_operand" - "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] - "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && ((optimize_function_for_size_p (cfun) - || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) + "fm,f,G,rmF,Fr,C ,Y2*x,m ,Y2*x,r ,Yi"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_function_for_size_p (cfun) - && !memory_operand (operands[0], DFmode) && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || ((optimize_function_for_size_p (cfun) - || !TARGET_MEMORY_MISMATCH_STALL - || reload_in_progress || reload_completed) - && memory_operand (operands[0], DFmode)))" + || memory_operand (operands[0], DFmode))" { switch (which_alternative) { @@ -2858,6 +2933,7 @@ case 3: case 4: return "#"; + case 5: switch (get_attr_mode (insn)) { @@ -2906,34 +2982,22 @@ else return "movsd\t{%1, %0|%0, %1}"; case MODE_V1DF: - if (TARGET_AVX) - { - if (REG_P (operands[0])) - return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; - else - return "vmovlpd\t{%1, %0|%0, %1}"; - } - else - return "movlpd\t{%1, %0|%0, %1}"; + return "%vmovlpd\t{%1, %d0|%d0, %1}"; case MODE_V2SF: - if (TARGET_AVX) - { - if (REG_P (operands[0])) - return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; - else - return "vmovlps\t{%1, %0|%0, %1}"; - } - else - return "movlps\t{%1, %0|%0, %1}"; + return "%vmovlps\t{%1, %d0|%d0, %1}"; default: gcc_unreachable (); } + case 9: + case 10: + return "%vmovd\t{%1, %0|%0, %1}"; + default: - gcc_unreachable (); + gcc_unreachable(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov") (set (attr "prefix") (if_then_else (eq_attr "alternative" "0,1,2,3,4") (const_string "orig") @@ -2945,8 +3009,8 @@ (set (attr "mode") (cond [(eq_attr "alternative" "0,1,2") (const_string "DF") - (eq_attr "alternative" "3,4") - (const_string "SI") + (eq_attr "alternative" "3,4,9,10") + (const_string "DI") /* For SSE1, we have many fewer alternatives. */ (eq (symbol_ref "TARGET_SSE2") (const_int 0)) @@ -2993,12 +3057,14 @@ ] (const_string "DF")))]) -(define_insn "*movdf_integer_rex64" +(define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ") + "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") (match_operand:DF 1 "general_operand" - "fm,f,G,rmF,Fr,C ,Y2*x,m ,Y2*x,r ,Yi"))] - "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) + "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && optimize_function_for_speed_p (cfun) + && TARGET_INTEGER_DFMODE_MOVES && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || (!(TARGET_SSE2 && TARGET_SSE_MATH) @@ -3024,17 +3090,17 @@ switch (get_attr_mode (insn)) { case MODE_V4SF: - return "%vxorps\t%0, %d0"; + return "xorps\t%0, %0"; case MODE_V2DF: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; + return "xorps\t%0, %0"; else - return "%vxorpd\t%0, %d0"; + return "xorpd\t%0, %0"; case MODE_TI: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; + return "xorps\t%0, %0"; else - return "%vpxor\t%0, %d0"; + return "pxor\t%0, %0"; default: gcc_unreachable (); } @@ -3044,50 +3110,34 @@ switch (get_attr_mode (insn)) { case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; + return "movaps\t{%1, %0|%0, %1}"; case MODE_V2DF: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; + return "movaps\t{%1, %0|%0, %1}"; else - return "%vmovapd\t{%1, %0|%0, %1}"; + return "movapd\t{%1, %0|%0, %1}"; case MODE_TI: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; + return "movaps\t{%1, %0|%0, %1}"; else - return "%vmovdqa\t{%1, %0|%0, %1}"; + return "movdqa\t{%1, %0|%0, %1}"; case MODE_DI: - return "%vmovq\t{%1, %0|%0, %1}"; + return "movq\t{%1, %0|%0, %1}"; case MODE_DF: - if (TARGET_AVX) - { - if (REG_P (operands[0]) && REG_P (operands[1])) - return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; - else - return "vmovsd\t{%1, %0|%0, %1}"; - } - else - return "movsd\t{%1, %0|%0, %1}"; + return "movsd\t{%1, %0|%0, %1}"; case MODE_V1DF: - return "%vmovlpd\t{%1, %d0|%d0, %1}"; + return "movlpd\t{%1, %0|%0, %1}"; case MODE_V2SF: - return "%vmovlps\t{%1, %d0|%d0, %1}"; + return "movlps\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } - case 9: - case 10: - return "%vmovd\t{%1, %0|%0, %1}"; - default: gcc_unreachable(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov") - (set (attr "prefix") - (if_then_else (eq_attr "alternative" "0,1,2,3,4") - (const_string "orig") - (const_string "maybe_vex"))) + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") (set (attr "prefix_data16") (if_then_else (eq_attr "mode" "V1DF") (const_string "1") @@ -3095,8 +3145,8 @@ (set (attr "mode") (cond [(eq_attr "alternative" "0,1,2") (const_string "DF") - (eq_attr "alternative" "3,4,9,10") - (const_string "DI") + (eq_attr "alternative" "3,4") + (const_string "SI") /* For SSE1, we have many fewer alternatives. */ (eq (symbol_ref "TARGET_SSE2") (const_int 0)) @@ -3143,21 +3193,29 @@ ] (const_string "DF")))]) -(define_insn "*movdf_integer" +;; Moving is usually shorter when only FP registers are used. This separate +;; movdf pattern avoids the use of integer registers for FP operations +;; when optimizing for size. + +(define_insn "*movdf_internal_nointeger" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") + "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") (match_operand:DF 1 "general_operand" - "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))] + "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) - && optimize_function_for_speed_p (cfun) - && TARGET_INTEGER_DFMODE_MOVES + && ((optimize_function_for_size_p (cfun) + || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_function_for_size_p (cfun) + && !memory_operand (operands[0], DFmode) && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], DFmode))" + || ((optimize_function_for_size_p (cfun) + || !TARGET_MEMORY_MISMATCH_STALL + || reload_in_progress || reload_completed) + && memory_operand (operands[0], DFmode)))" { switch (which_alternative) { @@ -3171,22 +3229,21 @@ case 3: case 4: return "#"; - case 5: switch (get_attr_mode (insn)) { case MODE_V4SF: - return "xorps\t%0, %0"; + return "%vxorps\t%0, %d0"; case MODE_V2DF: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "xorps\t%0, %0"; + return "%vxorps\t%0, %d0"; else - return "xorpd\t%0, %0"; + return "%vxorpd\t%0, %d0"; case MODE_TI: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "xorps\t%0, %0"; + return "%vxorps\t%0, %d0"; else - return "pxor\t%0, %0"; + return "%vpxor\t%0, %d0"; default: gcc_unreachable (); } @@ -3196,34 +3253,62 @@ switch (get_attr_mode (insn)) { case MODE_V4SF: - return "movaps\t{%1, %0|%0, %1}"; + return "%vmovaps\t{%1, %0|%0, %1}"; case MODE_V2DF: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "movaps\t{%1, %0|%0, %1}"; + return "%vmovaps\t{%1, %0|%0, %1}"; else - return "movapd\t{%1, %0|%0, %1}"; + return "%vmovapd\t{%1, %0|%0, %1}"; case MODE_TI: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "movaps\t{%1, %0|%0, %1}"; + return "%vmovaps\t{%1, %0|%0, %1}"; else - return "movdqa\t{%1, %0|%0, %1}"; + return "%vmovdqa\t{%1, %0|%0, %1}"; case MODE_DI: - return "movq\t{%1, %0|%0, %1}"; + return "%vmovq\t{%1, %0|%0, %1}"; case MODE_DF: - return "movsd\t{%1, %0|%0, %1}"; + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; case MODE_V1DF: - return "movlpd\t{%1, %0|%0, %1}"; + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlpd\t{%1, %0|%0, %1}"; + } + else + return "movlpd\t{%1, %0|%0, %1}"; case MODE_V2SF: - return "movlps\t{%1, %0|%0, %1}"; + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlps\t{%1, %0|%0, %1}"; + } + else + return "movlps\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } default: - gcc_unreachable(); + gcc_unreachable (); } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) (set (attr "prefix_data16") (if_then_else (eq_attr "mode" "V1DF") (const_string "1") @@ -3277,129 +3362,34 @@ (const_string "V1DF") (const_string "DF")) ] - (const_string "DF")))]) - -(define_split - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (match_operand:DF 1 "general_operand" ""))] - "reload_completed - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && ! (ANY_FP_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[0])))) - && ! (ANY_FP_REG_P (operands[1]) || - (GET_CODE (operands[1]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -(define_insn "*swapdf" - [(set (match_operand:DF 0 "fp_register_operand" "+f") - (match_operand:DF 1 "fp_register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "reload_completed || TARGET_80387" -{ - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "DF")]) - -(define_expand "movxf" - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (match_operand:XF 1 "general_operand" ""))] - "" - "ix86_expand_move (XFmode, operands); DONE;") - -;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer instructions is 3+3*memory operand size -;; Pushing using integer instructions is longer except for constants -;; and direct memory references. -;; (assuming that any given constant is pushed only once, but this ought to be -;; handled elsewhere). - -(define_insn "*pushxf_nointeger" - [(set (match_operand:XF 0 "push_operand" "=X,X,X") - (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] - "optimize_function_for_size_p (cfun)" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*,*") - (set_attr "mode" "XF,SI,SI")]) - -(define_insn "*pushxf_integer" - [(set (match_operand:XF 0 "push_operand" "=<,<") - (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] - "optimize_function_for_speed_p (cfun)" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*") - (set_attr "mode" "XF,SI")]) + (const_string "DF")))]) (define_split - [(set (match_operand 0 "push_operand" "") - (match_operand 1 "general_operand" ""))] + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] "reload_completed - && (GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == DFmode) - && !ANY_FP_REG_P (operands[1])" + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" [(const_int 0)] "ix86_split_long_move (operands); DONE;") -(define_split - [(set (match_operand:XF 0 "push_operand" "") - (match_operand:XF 1 "any_fp_register_operand" ""))] - "" - [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) - (set (mem:XF (reg:P SP_REG)) (match_dup 1))] - "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") - -;; Do not use integer registers when optimizing for size -(define_insn "*movxf_nointeger" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") - (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "optimize_function_for_size_p (cfun) - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && (reload_in_progress || reload_completed - || standard_80387_constant_p (operands[1]) - || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], XFmode))" -{ - switch (which_alternative) - { - case 0: - case 1: - return output_387_reg_move (insn, operands); - - case 2: - return standard_80387_constant_opcode (operands[1]); - - case 3: case 4: - return "#"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) - -(define_insn "*movxf_integer" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") - (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] - "optimize_function_for_speed_p (cfun) - && !(MEM_P (operands[0]) && MEM_P (operands[1])) +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") + (match_operand:SF 1 "general_operand" + "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], XFmode))" + || memory_operand (operands[0], SFmode))" { switch (which_alternative) { @@ -3410,112 +3400,79 @@ case 2: return standard_80387_constant_opcode (operands[1]); - case 3: case 4: - return "#"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) - -(define_expand "movtf" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "nonimmediate_operand" ""))] - "TARGET_SSE2" -{ - ix86_expand_move (TFmode, operands); - DONE; -}) - -(define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") - (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] - "TARGET_SSE2 - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" -{ - switch (which_alternative) - { - case 0: - case 1: + case 3: + case 4: + return "mov{l}\t{%1, %0|%0, %1}"; + case 5: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + else + return "%vxorps\t%0, %d0"; + case 6: if (get_attr_mode (insn) == MODE_V4SF) return "%vmovaps\t{%1, %0|%0, %1}"; else - return "%vmovdqa\t{%1, %0|%0, %1}"; - case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vxorps\t%0, %d0"; + return "%vmovss\t{%1, %d0|%d0, %1}"; + case 7: + if (TARGET_AVX) + return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}" + : "vmovss\t{%1, %0|%0, %1}"; else - return "%vpxor\t%0, %d0"; - case 3: - case 4: - return "#"; + return "movss\t{%1, %0|%0, %1}"; + case 8: + return "%vmovss\t{%1, %0|%0, %1}"; + + case 9: case 10: case 14: case 15: + return "movd\t{%1, %0|%0, %1}"; + case 12: case 13: + return "%vmovd\t{%1, %0|%0, %1}"; + + case 11: + return "movq\t{%1, %0|%0, %1}"; + default: gcc_unreachable (); } } - [(set_attr "type" "ssemov,ssemov,sselog1,*,*") - (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") + (const_string "maybe_vex") + (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "0,2") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") (if_then_else - (ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (const_string "V4SF") - (const_string "TI")) - (eq_attr "alternative" "1") + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") (if_then_else - (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") (const_int 0)) - (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") (const_int 0))) (const_string "V4SF") - (const_string "TI"))] - (const_string "DI")))]) - -(define_insn "*pushtf_sse" - [(set (match_operand:TF 0 "push_operand" "=<,<,<") - (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] - "TARGET_SSE2" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "sse,*,*") - (set_attr "mode" "TF,SI,SI")]) - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "TARGET_SSE2 && reload_completed - && !SSE_REG_P (operands[1])" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "any_fp_register_operand" ""))] - "TARGET_SSE2" - [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) - (set (mem:TF (reg:P SP_REG)) (match_dup 1))] - "") - -(define_split - [(set (match_operand 0 "nonimmediate_operand" "") - (match_operand 1 "general_operand" ""))] - "reload_completed - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && GET_MODE (operands[0]) == XFmode - && ! (ANY_FP_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[0])))) - && ! (ANY_FP_REG_P (operands[1]) || - (GET_CODE (operands[1]) == SUBREG - && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) (define_split [(set (match_operand 0 "register_operand" "") @@ -3524,8 +3481,8 @@ && MEM_P (operands[1]) && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == SFmode - || GET_MODE (operands[0]) == DFmode) + || GET_MODE (operands[0]) == DFmode + || GET_MODE (operands[0]) == SFmode) && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) (match_dup 2))] { @@ -3556,8 +3513,8 @@ && MEM_P (operands[1]) && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == SFmode - || GET_MODE (operands[0]) == DFmode) + || GET_MODE (operands[0]) == DFmode + || GET_MODE (operands[0]) == SFmode) && (operands[2] = find_constant_src (insn))" [(set (match_dup 0) (match_dup 2))] { @@ -3581,21 +3538,6 @@ FAIL; }) -(define_insn "swapxf" - [(set (match_operand:XF 0 "register_operand" "+f") - (match_operand:XF 1 "register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "TARGET_80387" -{ - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "XF")]) - ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence (define_split [(set (match_operand:X87MODEF 0 "register_operand" "") @@ -3616,13 +3558,35 @@ operands[1] = CONST1_RTX (mode); }) -(define_split - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] - "reload_completed - && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" - [(const_int 0)] - "ix86_split_long_move (operands); DONE;") +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) + +(define_insn "*swap" + [(set (match_operand:MODEF 0 "fp_register_operand" "+f") + (match_operand:MODEF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387 || reload_completed" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "")]) ;; Zero extension instructions