From 7fb1431bfa30ac9d85fb614f786c8076ff407673 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 21 Mar 2008 21:43:12 +0100 Subject: [PATCH] re PR target/13958 (Conversion from unsigned to double is painfully slow on P4) PR target/13958 * config/i386/i386.md ("*floatunssi_1"): New pattern with corresponding post-reload splitters. ("floatunssi2"): Expand to unsigned_float x87 insn pattern when x87 FP math is selected. * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse): New function prototype. * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New unreachable function to ease macroization of insn patterns. From-SVN: r133435 --- gcc/ChangeLog | 12 ++++++ gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.c | 8 ++++ gcc/config/i386/i386.md | 73 ++++++++++++++++++++++++++++++++--- 4 files changed, 89 insertions(+), 5 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2f4c0768ef1..3d2ad8ff026 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2008-03-21 Uros Bizjak + + PR target/13958 + * config/i386/i386.md ("*floatunssi_1"): New pattern with + corresponding post-reload splitters. + ("floatunssi2"): Expand to unsigned_float x87 insn pattern + when x87 FP math is selected. + * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse): + New function prototype. + * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New + unreachable function to ease macroization of insn patterns. + 2008-03-21 Martin Jambor * tree-data-ref.c (dump_data_dependence_relation): Avoid data diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 8dd203ebee6..ef2e0ff5db0 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -91,6 +91,7 @@ extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx); extern void ix86_split_convert_uns_si_sse (rtx[]); extern void ix86_expand_convert_uns_didf_sse (rtx, rtx); +extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx); extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx); extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx); extern void ix86_expand_convert_sign_didf_sse (rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5bb5494e27f..8ddfa9f183b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10903,6 +10903,14 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) ix86_expand_vector_extract (false, target, fp_xmm, 0); } +/* Not used, but eases macroization of patterns. */ +void +ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED, + rtx input ATTRIBUTE_UNUSED) +{ + gcc_unreachable (); +} + /* Convert an unsigned SImode value into a DFmode. Only currently used for SSE, but applicable anywhere. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4d4978d778b..8b0a2803c11 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -5313,13 +5313,76 @@ DONE; }) +;; Avoid store forwarding (partial memory) stall penalty by extending +;; SImode value to DImode through XMM register instead of pushing two +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES +;; targets benefit from this optimization. Also note that fild +;; loads from memory only. + +(define_insn "*floatunssi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" "x,m"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:SI 3 "=X,x"))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] + "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] +{ + emit_move_insn (operands[3], operands[1]); + operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); +}) + (define_expand "floatunssi2" - [(use (match_operand:MODEF 0 "register_operand" "")) - (use (match_operand:SI 1 "nonimmediate_operand" ""))] - "!TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + [(parallel + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_scratch:SI 3 ""))])] + "!TARGET_64BIT + && ((TARGET_80387 && TARGET_SSE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" { - ix86_expand_convert_uns_si_sse (operands[0], operands[1]); - DONE; + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + ix86_expand_convert_uns_si_sse (operands[0], operands[1]); + DONE; + } + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (DImode, slot); + } }) (define_expand "floatunsdisf2" -- 2.30.2