re PR target/13958 (Conversion from unsigned to double is painfully slow on P4)
authorUros Bizjak <ubizjak@gmail.com>
Fri, 21 Mar 2008 20:43:12 +0000 (21:43 +0100)
committerUros Bizjak <uros@gcc.gnu.org>
Fri, 21 Mar 2008 20:43:12 +0000 (21:43 +0100)
        PR target/13958
        * config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with
        corresponding post-reload splitters.
        ("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern
        when x87 FP math is selected.
        * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse):
        New function prototype.
        * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New
        unreachable function to ease macroization of insn patterns.

From-SVN: r133435

gcc/ChangeLog
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/i386.md

index 2f4c0768ef164032ace6a92eea33981d05ae9e73..3d2ad8ff0269e946897dcf9a8aefa8153ce7733f 100644 (file)
@@ -1,3 +1,15 @@
+2008-03-21  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/13958
+       * config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with
+       corresponding post-reload splitters.
+       ("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern
+       when x87 FP math is selected.
+       * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse):
+       New function prototype.
+       * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New
+       unreachable function to ease macroization of insn patterns.
+
 2008-03-21  Martin Jambor  <mjambor@suse.cz>
 
        * tree-data-ref.c (dump_data_dependence_relation): Avoid data
index 8dd203ebee60a4f366830f6ebfdc572f4448fc44..ef2e0ff5db0c857c6d3e460d1f16c90035d64db2 100644 (file)
@@ -91,6 +91,7 @@ extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
 extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
 extern void ix86_split_convert_uns_si_sse (rtx[]);
 extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
 extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
 extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
 extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
index 5bb5494e27f766e9f1469813a1bd60746e512979..8ddfa9f183b28c5df331704dcc57917c48c5bad4 100644 (file)
@@ -10903,6 +10903,14 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
   ix86_expand_vector_extract (false, target, fp_xmm, 0);
 }
 
+/* Not used, but eases macroization of patterns.  */
+void
+ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
+                                 rtx input ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+
 /* Convert an unsigned SImode value into a DFmode.  Only currently used
    for SSE, but applicable anywhere.  */
 
index 4d4978d778b0f0d842810a472c608fd13ba75d3e..8b0a2803c11de95662b6c4a340b019fdec0a582b 100644 (file)
   DONE;
 })
 
+;; Avoid store forwarding (partial memory) stall penalty by extending
+;; SImode value to DImode through XMM register instead of pushing two
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
+;; targets benefit from this optimization. Also note that fild
+;; loads from memory only.
+
+(define_insn "*floatunssi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (unsigned_float:X87MODEF
+         (match_operand:SI 1 "nonimmediate_operand" "x,m")))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:SI 3 "=X,x"))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (unsigned_float:X87MODEF
+         (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0)
+       (float:X87MODEF (match_dup 2)))]
+  "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (unsigned_float:X87MODEF
+         (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+       (float:X87MODEF (match_dup 2)))]
+{
+  emit_move_insn (operands[3], operands[1]);
+  operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
+})
+
 (define_expand "floatunssi<mode>2"
-  [(use (match_operand:MODEF 0 "register_operand" ""))
-   (use (match_operand:SI 1 "nonimmediate_operand" ""))]
-  "!TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  [(parallel
+     [(set (match_operand:X87MODEF 0 "register_operand" "")
+          (unsigned_float:X87MODEF
+            (match_operand:SI 1 "nonimmediate_operand" "")))
+      (clobber (match_dup 2))
+      (clobber (match_scratch:SI 3 ""))])]
+  "!TARGET_64BIT
+   && ((TARGET_80387 && TARGET_SSE)
+       || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
 {
-  ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
-  DONE;
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    {
+      ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
+      DONE;
+    }
+  else
+    {
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      operands[2] = assign_386_stack_local (DImode, slot);
+    }
 })
 
 (define_expand "floatunsdisf2"