From 479fecd31e5ba51cefad106c8a91989fe5c88e16 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sat, 14 May 2011 17:33:02 +0200 Subject: [PATCH] constraint.md (Yd, Yx): New register constraints. * config/i386/constraint.md (Yd, Yx): New register constraints. * config/i386/i386.md (*pushdf): Merge with *pushdf_nointeger. Use Yd conditional register constraint. (*movtf_internal): Use standard_sse_constant_opcode. (*movxf_internal): Merge with *movxf_internal_nointeger. Use Yx conditional register constraint. (*movdf_internal): Merge with *movdf_internal_nointeger. Use Yd conditional register constraint. Use standard_sse_constant_p to check for valid SSE constants and call standard_sse_constant_opcode to output SSE insn. (*movsf_internal): Use standard_sse_constant_p to check for valid SSE constants and call standard_sse_constant_opcode to output SSE insn. * config/i386/i386.c (ix86_option_ovverride_internal): Set TARGET_INTEGER_DFMODE_MOVES for 64bit targets. Clear it when optimize_size is set. (standard_sse_constant_opcode): Output conditional AVX insn templates. From-SVN: r173757 --- gcc/ChangeLog | 42 ++++- gcc/config/i386/constraints.md | 12 +- gcc/config/i386/i386.c | 19 +- gcc/config/i386/i386.md | 308 +++++---------------------------- 4 files changed, 109 insertions(+), 272 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9e9ba7fcf34..b5439d5f55f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,41 @@ +2011-05-14 Uros Bizjak + + * config/i386/constraint.md (Yd, Yx): New register constraints. + * config/i386/i386.md (*pushdf): Merge with *pushdf_nointeger. Use + Yd conditional register constraint. + (*movtf_internal): Use standard_sse_constant_opcode. + (*movxf_internal): Merge with *movxf_internal_nointeger. Use + Yx conditional register constraint. + (*movdf_internal): Merge with *movdf_internal_nointeger. Use + Yd conditional register constraint. Use standard_sse_constant_p to + check for valid SSE constants and call standard_sse_constant_opcode to + output SSE insn. + (*movsf_internal): Use standard_sse_constant_p to check for valid SSE + constants and call standard_sse_constant_opcode to output SSE insn. + * config/i386/i386.c (ix86_option_ovverride_internal): Set + TARGET_INTEGER_DFMODE_MOVES for 64bit targets. Clear it when + optimize_size is set. + (standard_sse_constant_opcode): Output conditional AVX insn templates. + +2011-05-14 Uros Bizjak + + * config/i386/constraint.md (Yd, Yx): New register constraints. + * config/i386/i386.md (*pushdf): Merge with *pushdf_nointeger. Use + Yd conditional register constraint. + (*movtf_internal): Use standard_sse_constant_opcode. + (*movxf_internal): Merge with *movxf_internal_nointeger. Use + Yx conditional register constraint. + (*movdf_internal): Merge with *movdf_internal_nointeger. Use + Yd conditional register constraint. Use standard_sse_constant_p to + check for valid SSE constants and call standard_sse_constant_opcode to + output SSE insn. + (*movsf_internal): Use standard_sse_constant_p to check for valid SSE + constants and call standard_sse_constant_opcode to output SSE insn. + * config/i386/i386.c (ix86_option_ovverride_internal): Set + TARGET_INTEGER_DFMODE_MOVES for 64bit targets. Clear it when + optimize_size is set. + (standard_sse_constant_opcode): Output conditional AVX insn templates. + 2011-05-14 Tobias Burnus * doc/invoke.texi (-Ofast): Also enables -fstack-arrays. @@ -243,11 +281,11 @@ 2011-05-11 Uros Bizjak * config/i386/i386.c (legitimize_tls_address) - : Call gen_tls_dynamic_gnu2_{32,64} + : Call gen_tls_dynamic_gnu2_{32,64} expanders directly for TARGET_GNU2_TLS. Determine pic and __tls_get_addr symbol reference here. Update call to gen_tls_global_dynamic_{32,64} for added arguments. - : Call gen_tls_dynamic_gnu2_{32,64} + : Call gen_tls_dynamic_gnu2_{32,64} expanders directly for TARGET_GNU2_TLS. Determine __tls_get_addr symbol reference here. Update call to gen_tls_local_dynamic_base_{32,64} for added arguments. Attach diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 89722bb92da..ed558b48312 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -90,6 +90,8 @@ ;; 2 SSE2 enabled ;; i SSE2 inter-unit moves enabled ;; m MMX inter-unit moves enabled +;; d Integer register when integer DFmode moves are enabled +;; x Integer register when integer XFmode moves are enabled (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -105,6 +107,14 @@ "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS" "@internal Any MMX register, when inter-unit moves are enabled.") +(define_register_constraint "Yd" + "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS" + "@internal Any integer register when integer DFmode moves are enabled.") + +(define_register_constraint "Yx" + "optimize_function_for_speed_p (cfun) ? GENERAL_REGS : NO_REGS" + "@internal Any integer register when integer XFmode moves are enabled.") + ;; Integer constant constraints. (define_constraint "I" "Integer constant in the range 0 @dots{} 31, for 32-bit shifts." @@ -149,7 +159,7 @@ (define_constraint "G" "Standard 80387 floating point constant." (and (match_code "const_double") - (match_test "standard_80387_constant_p (op)"))) + (match_test "standard_80387_constant_p (op) > 0"))) ;; This can theoretically be any mode's CONST0_RTX. (define_constraint "C" diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a23367f553a..84678061d5e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3933,6 +3933,13 @@ ix86_option_override_internal (bool main_args_p) if (!TARGET_80387) target_flags |= MASK_NO_FANCY_MATH_387; + /* On 32bit targets, avoid moving DFmode values in + integer registers when optimizing for size. */ + if (TARGET_64BIT) + target_flags |= TARGET_INTEGER_DFMODE_MOVES; + else if (optimize_size) + target_flags &= ~TARGET_INTEGER_DFMODE_MOVES; + /* Turn on MMX builtins for -msse. */ if (TARGET_SSE) { @@ -8580,17 +8587,17 @@ standard_sse_constant_opcode (rtx insn, rtx x) switch (get_attr_mode (insn)) { case MODE_V4SF: - return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; + return "%vxorps\t%0, %d0"; case MODE_V2DF: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; + return "%vxorps\t%0, %d0"; else - return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; + return "%vxorpd\t%0, %d0"; case MODE_TI: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; + return "%vxorps\t%0, %d0"; else - return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; + return "%vpxor\t%0, %d0"; case MODE_V8SF: return "vxorps\t%x0, %x0, %x0"; case MODE_V4DF: @@ -8607,7 +8614,7 @@ standard_sse_constant_opcode (rtx insn, rtx x) break; } case 2: - return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0"; + return "%vpcmpeqd\t%0, %d0"; default: break; } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b0c4c83f825..09c9b7a3d00 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2702,10 +2702,14 @@ [(const_int 0)] "ix86_split_long_move (operands); DONE;") +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 2+2*memory operand size +;; On the average, pushdf using integers can be still shorter. + (define_insn "*pushdf" [(set (match_operand:DF 0 "push_operand" "=<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] - "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" + (match_operand:DF 1 "general_no_elim_operand" "f,Yd*rFo,Y2"))] + "" { /* This insn should be already split before reg-stack. */ gcc_unreachable (); @@ -2714,23 +2718,6 @@ (set_attr "unit" "i387,*,*") (set_attr "mode" "DF,SI,DF")]) -;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer instructions is 2+2*memory operand size -;; On the average, pushdf using integers can be still shorter. Allow this -;; pattern for optimize_size too. - -(define_insn "*pushdf_nointeger" - [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") - (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] - "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)" -{ - /* This insn should be already split before reg-stack. */ - gcc_unreachable (); -} - [(set_attr "type" "multi") - (set_attr "unit" "i387,*,*,*") - (set_attr "mode" "DF,SI,SI,DF")]) - ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:DF 0 "push_operand" "") @@ -2822,14 +2809,14 @@ return "%vmovaps\t{%1, %0|%0, %1}"; else return "%vmovdqa\t{%1, %0|%0, %1}"; + case 2: - if (get_attr_mode (insn) == MODE_V4SF) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; + return standard_sse_constant_opcode (insn, operands[1]); + case 3: case 4: return "#"; + default: gcc_unreachable (); } @@ -2862,42 +2849,14 @@ "ix86_split_long_move (operands); DONE;") (define_insn "*movxf_internal" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") - (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] - "optimize_function_for_speed_p (cfun) - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && (!can_create_pseudo_p () - || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], XFmode))" -{ - switch (which_alternative) - { - case 0: - case 1: - return output_387_reg_move (insn, operands); - - case 2: - return standard_80387_constant_opcode (operands[1]); - - case 3: case 4: - return "#"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) - -;; Do not use integer registers when optimizing for size -(define_insn "*movxf_internal_nointeger" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") - (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "optimize_function_for_size_p (cfun) - && !(MEM_P (operands[0]) && MEM_P (operands[1])) + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r ,o") + (match_operand:XF 1 "general_operand" "fm,f,G,Yx*roF,FYx*r"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () - || standard_80387_constant_p (operands[1]) + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE + || (optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1]) > 0) || memory_operand (operands[0], XFmode))" { switch (which_alternative) @@ -2940,10 +2899,12 @@ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || (!(TARGET_SSE2 && TARGET_SSE_MATH) - && optimize_function_for_size_p (cfun) - && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE + || (optimize_function_for_size_p (cfun) + && ((!(TARGET_SSE2 && TARGET_SSE_MATH) + && standard_80387_constant_p (operands[1]) > 0) + || (TARGET_SSE2 && TARGET_SSE_MATH + && standard_sse_constant_p (operands[1])))) || memory_operand (operands[0], DFmode))" { switch (which_alternative) @@ -2966,23 +2927,8 @@ return "#"; case 7: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return "%vxorps\t%0, %d0"; - case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vxorpd\t%0, %d0"; - case MODE_TI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; - default: - gcc_unreachable (); - } + return standard_sse_constant_opcode (insn, operands[1]); + case 8: case 9: case 10: @@ -3094,21 +3040,26 @@ ] (const_string "DF")))]) +;; Possible store forwarding (partial memory) stall in alternative 4. (define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") + "=f,m,f,Yd*r ,o ,Y2*x,Y2*x,Y2*x,m ") (match_operand:DF 1 "general_operand" - "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))] + "fm,f,G,Yd*roF,FYd*r,C ,Y2*x,m ,Y2*x"))] "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && optimize_function_for_speed_p (cfun) - && TARGET_INTEGER_DFMODE_MOVES && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || (!(TARGET_SSE2 && TARGET_SSE_MATH) - && optimize_function_for_size_p (cfun) - && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE - || memory_operand (operands[0], DFmode))" + || (optimize_function_for_size_p (cfun) + && ((!(TARGET_SSE2 && TARGET_SSE_MATH) + && standard_80387_constant_p (operands[1]) > 0) + || (TARGET_SSE2 && TARGET_SSE_MATH + && standard_sse_constant_p (operands[1]))) + && !memory_operand (operands[0], DFmode)) + || ((TARGET_INTEGER_DFMODE_MOVES + || (optimize_function_for_size_p (cfun) + && !TARGET_MEMORY_MISMATCH_STALL)) + && memory_operand (operands[0], DFmode)))" { switch (which_alternative) { @@ -3124,179 +3075,8 @@ return "#"; case 5: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return "%vxorps\t%0, %d0"; - case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vxorpd\t%0, %d0"; - case MODE_TI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; - default: - gcc_unreachable (); - } - case 6: - case 7: - case 8: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovapd\t{%1, %0|%0, %1}"; - case MODE_TI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_DI: - return "%vmovq\t{%1, %0|%0, %1}"; - case MODE_DF: - if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) - return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; - else - return "%vmovsd\t{%1, %0|%0, %1}"; - case MODE_V1DF: - if (TARGET_AVX && REG_P (operands[0])) - return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; - else - return "%vmovlpd\t{%1, %0|%0, %1}"; - case MODE_V2SF: - if (TARGET_AVX && REG_P (operands[0])) - return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; - else - return "%vmovlps\t{%1, %0|%0, %1}"; - default: - gcc_unreachable (); - } - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") - (set (attr "prefix") - (if_then_else (eq_attr "alternative" "0,1,2,3,4") - (const_string "orig") - (const_string "maybe_vex"))) - (set (attr "prefix_data16") - (if_then_else (eq_attr "mode" "V1DF") - (const_string "1") - (const_string "*"))) - (set (attr "mode") - (cond [(eq_attr "alternative" "0,1,2") - (const_string "DF") - (eq_attr "alternative" "3,4") - (const_string "SI") - - /* For SSE1, we have many fewer alternatives. */ - (eq (symbol_ref "TARGET_SSE2") (const_int 0)) - (cond [(eq_attr "alternative" "5,6") - (const_string "V4SF") - ] - (const_string "V2SF")) - - /* xorps is one byte shorter. */ - (eq_attr "alternative" "5") - (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (const_string "V4SF") - (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") - (const_int 0)) - (const_string "TI") - ] - (const_string "V2DF")) + return standard_sse_constant_opcode (insn, operands[1]); - /* For architectures resolving dependencies on - whole SSE registers use APD move to break dependency - chains, otherwise use short move to avoid extra work. - - movaps encodes one byte shorter. */ - (eq_attr "alternative" "6") - (cond - [(ne (symbol_ref "optimize_function_for_size_p (cfun)") - (const_int 0)) - (const_string "V4SF") - (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") - (const_int 0)) - (const_string "V2DF") - ] - (const_string "DF")) - /* For architectures resolving dependencies on register - parts we may avoid extra work to zero out upper part - of register. */ - (eq_attr "alternative" "7") - (if_then_else - (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") - (const_int 0)) - (const_string "V1DF") - (const_string "DF")) - ] - (const_string "DF")))]) - -;; Moving is usually shorter when only FP registers are used. This separate -;; movdf pattern avoids the use of integer registers for FP operations -;; when optimizing for size. - -(define_insn "*movdf_internal_nointeger" - [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") - (match_operand:DF 1 "general_operand" - "fm,f,G,*roF,F*r,C ,Y2*x,mY2*x,Y2*x"))] - "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && (optimize_function_for_size_p (cfun) - || !TARGET_INTEGER_DFMODE_MOVES) - && (!can_create_pseudo_p () - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || (!(TARGET_SSE2 && TARGET_SSE_MATH) - && optimize_function_for_size_p (cfun) - && !memory_operand (operands[0], DFmode) - && standard_80387_constant_p (operands[1])) - || GET_CODE (operands[1]) != CONST_DOUBLE - || ((optimize_function_for_size_p (cfun) - || !TARGET_MEMORY_MISMATCH_STALL) - && memory_operand (operands[0], DFmode)))" -{ - switch (which_alternative) - { - case 0: - case 1: - return output_387_reg_move (insn, operands); - - case 2: - return standard_80387_constant_opcode (operands[1]); - - case 3: - case 4: - return "#"; - - case 5: - switch (get_attr_mode (insn)) - { - case MODE_V4SF: - return "%vxorps\t%0, %d0"; - case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vxorpd\t%0, %d0"; - case MODE_TI: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vxorps\t%0, %d0"; - else - return "%vpxor\t%0, %d0"; - default: - gcc_unreachable (); - } case 6: case 7: case 8: @@ -3421,9 +3201,12 @@ "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) - && standard_80387_constant_p (operands[1])) || GET_CODE (operands[1]) != CONST_DOUBLE + || (optimize_function_for_size_p (cfun) + && ((!TARGET_SSE_MATH + && standard_80387_constant_p (operands[1]) > 0) + || (TARGET_SSE_MATH + && standard_sse_constant_p (operands[1])))) || memory_operand (operands[0], SFmode))" { switch (which_alternative) @@ -3438,11 +3221,10 @@ case 3: case 4: return "mov{l}\t{%1, %0|%0, %1}"; + case 5: - if (get_attr_mode (insn) == MODE_TI) - return "%vpxor\t%0, %d0"; - else - return "%vxorps\t%0, %d0"; + return standard_sse_constant_opcode (insn, operands[1]); + case 6: if (get_attr_mode (insn) == MODE_V4SF) return "%vmovaps\t{%1, %0|%0, %1}"; -- 2.30.2