i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1): Emit gen_floatdi<X87MODEF:mode...
authorUros Bizjak <ubizjak@gmail.com>
Wed, 2 Apr 2008 19:07:27 +0000 (21:07 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Wed, 2 Apr 2008 19:07:27 +0000 (21:07 +0200)
        * config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1):
        Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values
        in 32bit mode when XMM registers are available to avoid store
        forwarding stalls.
        (floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and
        corresponding post-reload splitters.

From-SVN: r133845

gcc/ChangeLog
gcc/config/i386/i386.md

index 703fd01589aa002a40c8eb8b6e988b568b5b7f7c..984ff05559c762335cbce2ddcb10d99d79b5b720 100644 (file)
@@ -1,3 +1,12 @@
+2008-04-02  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1):
+       Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values
+       in 32bit mode when XMM registers are available to avoid store
+       forwarding stalls.
+       (floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and
+       corresponding post-reload splitters.
+
 2008-04-02  H.J. Lu  <hongjiu.lu@intel.com>
 
        * config/i386/i386.c (bdesc_sse_3arg): Add __builtin_ia32_shufps
index e2d68bb8209d17b10452fa5fcc6ef590efe6a2b3..adeafc2f3f5ed5cb836b6f48c0647e4a61921cc5 100644 (file)
   "&& 1"
   [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
              (clobber (match_dup 2))])]
-  "operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);")
+{
+  operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
+
+  /* Avoid store forwarding (partial memory) stall penalty
+     by passing DImode value through XMM registers.  */
+  if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT 
+      && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES 
+      && !optimize_size)
+    {
+      emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
+                                                           operands[1],
+                                                           operands[2]));
+      DONE;
+    }
+})
 
 (define_insn "*floatsi<mode>2_vector_mixed_with_temp"
   [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
   "")
 
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers.  */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (float:X87MODEF
+         (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+   (clobber (match_scratch:V4SI 3 "=&x,x"))
+   (clobber (match_scratch:V4SI 4 "=&x,x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && !optimize_size"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:DI 1 "register_operand" "")))
+   (clobber (match_operand:V4SI 3 "register_operand" ""))
+   (clobber (match_operand:V4SI 4 "register_operand" ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && !optimize_size
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+     Assemble the 64-bit DImode value in an xmm register.  */
+  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+                             gen_rtx_SUBREG (SImode, operands[1], 0)));
+  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+                             gen_rtx_SUBREG (SImode, operands[1], 4)));
+  emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
+
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
+   (clobber (match_operand:V4SI 2 "register_operand" ""))
+   (clobber (match_operand:V4SI 3 "register_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && !optimize_size
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
+
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two
 ;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES