i386.md (float partial SSE register stall splitter): Move splitter near its instructi...
authorUros Bizjak <ubizjak@gmail.com>
Sun, 9 Sep 2018 21:32:08 +0000 (23:32 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Sun, 9 Sep 2018 21:32:08 +0000 (23:32 +0200)
* config/i386/i386.md (float partial SSE register stall splitter): Move
splitter near its instruction pattern.
(float_extend partial SSE register stall splitter): Ditto.
(float_truncate partial SSE register stall splitter): Ditto.

From-SVN: r264185

gcc/ChangeLog
gcc/config/i386/i386.md

index a29b69ccfcf7f93997a37c1f6f0392b6aa28db5f..6dbe8147b3ecbcba71b92a94c16d67e7a4adef4e 100644 (file)
@@ -1,3 +1,10 @@
+2018-09-09  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.md (float partial SSE register stall splitter): Move
+       splitter near its instruction pattern.
+       (float_extend partial SSE register stall splitter): Ditto.
+       (float_truncate partial SSE register stall splitter): Ditto.
+
 2018-09-09  Hans-Peter Nilsson  <hp@bitrange.com>
 
        PR target/86794
index 0ee2d91414a242015241fc8b0e04a825ad079036..059ddbd6c9710489ac29bdf635c874aeda7a6691 100644 (file)
     }
 })
 
+(define_insn "*extendsfdf2"
+  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+        (float_extend:DF
+         (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return output_387_reg_move (insn, operands);
+
+    case 2:
+      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "fmov,fmov,ssecvt")
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "SF,XF,DF")
+   (set (attr "enabled")
+     (if_then_else
+       (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
+       (if_then_else
+        (eq_attr "alternative" "0,1")
+        (symbol_ref "TARGET_MIX_SSE_I387")
+        (symbol_ref "true"))
+       (if_then_else
+        (eq_attr "alternative" "0,1")
+        (symbol_ref "true")
+        (symbol_ref "false"))))])
+
 /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
    cvtss2sd:
       unpcklps xmm2,xmm2   ; packed conversion might crash on signaling NaNs
    (set (match_dup 0) (float_extend:DF (match_dup 2)))]
   "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
 
-(define_insn "*extendsfdf2"
-  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
+;; Break partial reg stall for cvtss2sd.  This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+  [(set (match_operand:DF 0 "sse_reg_operand")
         (float_extend:DF
-         (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+          (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && (!REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))
+   && (!EXT_REX_SSE_REG_P (operands[0])
+       || TARGET_AVX512VL)"
+  [(set (match_dup 0)
+        (vec_merge:V2DF
+         (vec_duplicate:V2DF
+           (float_extend:DF
+             (match_dup 1)))
+         (match_dup 0)
+          (const_int 1)))]
 {
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return output_387_reg_move (insn, operands);
-
-    case 2:
-      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov,fmov,ssecvt")
-   (set_attr "prefix" "orig,orig,maybe_vex")
-   (set_attr "mode" "SF,XF,DF")
-   (set (attr "enabled")
-     (if_then_else
-       (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
-       (if_then_else
-        (eq_attr "alternative" "0,1")
-        (symbol_ref "TARGET_MIX_SSE_I387")
-        (symbol_ref "true"))
-       (if_then_else
-        (eq_attr "alternative" "0,1")
-        (symbol_ref "true")
-        (symbol_ref "false"))))])
+  operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
+  emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
+})
 
 (define_expand "extend<mode>xf2"
   [(set (match_operand:XF 0 "nonimmediate_operand")
    (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
   "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
 
+;; Break partial reg stall for cvtsd2ss.  This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
+
+(define_split
+  [(set (match_operand:SF 0 "sse_reg_operand")
+        (float_truncate:SF
+         (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && (!REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))
+   && (!EXT_REX_SSE_REG_P (operands[0])
+       || TARGET_AVX512VL)"
+  [(set (match_dup 0)
+       (vec_merge:V4SF
+         (vec_duplicate:V4SF
+           (float_truncate:SF
+             (match_dup 1)))
+         (match_dup 0)
+         (const_int 1)))]
+{
+  operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
+  emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
+})
+
 ;; Conversion from XFmode to {SF,DF}mode
 
 (define_insn "truncxf<mode>2"
   DONE;
 })
 
-;; Avoid partial SSE register dependency stalls.  This splitter should split
-;; late in the pass sequence (after register rename pass), so allocated
-;; registers won't change anymore
-
-(define_split
-  [(set (match_operand:MODEF 0 "sse_reg_operand")
-       (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
-   && optimize_function_for_speed_p (cfun)
-   && (!EXT_REX_SSE_REG_P (operands[0])
-       || TARGET_AVX512VL)"
-  [(set (match_dup 0)
-       (vec_merge:<MODEF:ssevecmode>
-         (vec_duplicate:<MODEF:ssevecmode>
-           (float:MODEF
-             (match_dup 1)))
-         (match_dup 0)
-         (const_int 1)))]
-{
-  const machine_mode vmode = <MODEF:ssevecmode>mode;
-
-  operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
-  emit_move_insn (operands[0], CONST0_RTX (vmode));
-})
-
-;; Break partial reg stall for cvtsd2ss.  This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
-  [(set (match_operand:SF 0 "sse_reg_operand")
-        (float_truncate:SF
-         (match_operand:DF 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
-   && optimize_function_for_speed_p (cfun)
-   && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
-   && (!EXT_REX_SSE_REG_P (operands[0])
-       || TARGET_AVX512VL)"
-  [(set (match_dup 0)
-       (vec_merge:V4SF
-         (vec_duplicate:V4SF
-           (float_truncate:SF
-             (match_dup 1)))
-         (match_dup 0)
-         (const_int 1)))]
-{
-  operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
-  emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
-})
-
-;; Break partial reg stall for cvtss2sd.  This splitter should split
-;; late in the pass sequence (after register rename pass),
-;; so allocated registers won't change anymore.
-
-(define_split
-  [(set (match_operand:DF 0 "sse_reg_operand")
-        (float_extend:DF
-          (match_operand:SF 1 "nonimmediate_operand")))]
-  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
-   && optimize_function_for_speed_p (cfun)
-   && (!REG_P (operands[1])
-       || REGNO (operands[0]) != REGNO (operands[1]))
-   && (!EXT_REX_SSE_REG_P (operands[0])
-       || TARGET_AVX512VL)"
-  [(set (match_dup 0)
-        (vec_merge:V2DF
-         (vec_duplicate:V2DF
-           (float_extend:DF
-             (match_dup 1)))
-         (match_dup 0)
-          (const_int 1)))]
-{
-  operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
-  emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
-})
-
 ;; Avoid store forwarding (partial memory) stall penalty
 ;; by passing DImode value through XMM registers.  */
 
    (set_attr "unit" "i387")
    (set_attr "fp_int_src" "true")])
 
+;; Avoid partial SSE register dependency stalls.  This splitter should split
+;; late in the pass sequence (after register rename pass), so allocated
+;; registers won't change anymore
+
+(define_split
+  [(set (match_operand:MODEF 0 "sse_reg_operand")
+       (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && (!EXT_REX_SSE_REG_P (operands[0])
+       || TARGET_AVX512VL)"
+  [(set (match_dup 0)
+       (vec_merge:<MODEF:ssevecmode>
+         (vec_duplicate:<MODEF:ssevecmode>
+           (float:MODEF
+             (match_dup 1)))
+         (match_dup 0)
+         (const_int 1)))]
+{
+  const machine_mode vmode = <MODEF:ssevecmode>mode;
+
+  operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
+  emit_move_insn (operands[0], CONST0_RTX (vmode));
+})
+
 (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
   [(set (match_operand:MODEF 0 "register_operand")
        (unsigned_float:MODEF