[multiple changes]
authorUros Bizjak <uros@gcc.gnu.org>
Tue, 20 Nov 2012 18:02:36 +0000 (19:02 +0100)
committerUros Bizjak <uros@gcc.gnu.org>
Tue, 20 Nov 2012 18:02:36 +0000 (19:02 +0100)
2012-11-20  Uros Bizjak  <ubizjak@gmail.com>

* config/i386/i386.md (fix_trunc<MODEF:mode><SWI48:mode>_sse): Macroize
insn from fix_trunc<mode>{si,di}_sse using SWI48 mode iterator.
(peephole2 to avoid vector decoded forms): Macroize peephole2
using MODEF mode iterator.  Use SWI48 mode iterator instead of SWI48x.

2012-11-20  Uros Bizjak  <ubizjak@gmail.com>

PR target/19398
* config/i386/i386.md
(peephole2 to shorten x87->SSE reload sequences): Remove peephole2.
* config/i386/i386.h (enum ix86_tune_indices)
<IX86_TUNE_SHORTEN_X87_SSE>: Remove.
* config/i386/i386.h (initial_ix86_tune_features): Update.

2012-11-20  Vladimir Makarov  <vmakarov@redhat.com>

PR target/19398
* lra-constraints.c (process_alt_operands): Discourage reloads
through secodnary memory.

testsuite/ChangeLog:

2012-11-20  Uros Bizjak  <ubizjak@gmail.com>

PR target/19398
* gcc.target/i386/pr19398.c: New test.

From-SVN: r193671

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/lra-constraints.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr19398.c [new file with mode: 0644]

index e022c8a839ac302a3f81047e41447ca43c8b64fa..cc6441c657bd338de1cbfa900dd0680aff0b81a3 100644 (file)
@@ -1,3 +1,25 @@
+2012-11-20  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.md (fix_trunc<MODEF:mode><SWI48:mode>_sse): Macroize
+       insn from fix_trunc<mode>{si,di}_sse using SWI48 mode iterator.
+       (peephole2 to avoid vector decoded forms): Macroize peephole2
+       using MODEF mode iterator.  Use SWI48 mode iterator instead of SWI48x.
+
+2012-11-20  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/19398
+       * config/i386/i386.md
+       (peephole2 to shorten x87->SSE reload sequences): Remove peephole2.
+       * config/i386/i386.h (enum ix86_tune_indices)
+       <IX86_TUNE_SHORTEN_X87_SSE>: Remove.
+       * config/i386/i386.h (initial_ix86_tune_features): Update.
+
+2012-11-20  Vladimir Makarov  <vmakarov@redhat.com>
+
+       PR target/19398
+       * lra-constraints.c (process_alt_operands): Discourage reloads
+       through secodnary memory.
+
 2012-11-20  David Edelsohn  <dje.gcc@gmail.com>
 
        * config/rs6000/rs6000.md (largetoc_low): Revert.
@@ -48,8 +70,7 @@
        (v850_function_arg_advance): Likewise.
        (v850_print_operand): Handle CONST_INT and CONST_DOUBLE.
        (compute_register_save_size): Use df_regs_ever_live_p.
-       (increment_stack): Mark prologue adjustments as being frame
-       related.
+       (increment_stack): Mark prologue adjustments as being frame related.
        (expand_prologue): Handle pretend args.  Mark insns generated as
        being frame related.
        (expand_epilogue): Likewise.
index 8ce32be07bfbae2db42bbfd5c2bdf2229aefbdd3..fc757718d18199c297bdfba9f2568422fc130d25 100644 (file)
@@ -1855,9 +1855,6 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_TUNE_EXT_80387_CONSTANTS */
   m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
 
-  /* X86_TUNE_SHORTEN_X87_SSE */
-  ~m_K8,
-
   /* X86_TUNE_AVOID_VECTOR_DECODE */
   m_CORE2I7_64 | m_K8 | m_GENERIC64,
 
index 0cdbee1e862672714ba2d882f0100c4c1e14792e..ef626835f4aa463db7218536c4e859b9f828860e 100644 (file)
@@ -314,7 +314,6 @@ enum ix86_tune_indices {
   X86_TUNE_PAD_RETURNS,
   X86_TUNE_PAD_SHORT_FUNCTION,
   X86_TUNE_EXT_80387_CONSTANTS,
-  X86_TUNE_SHORTEN_X87_SSE,
   X86_TUNE_AVOID_VECTOR_DECODE,
   X86_TUNE_PROMOTE_HIMODE_IMUL,
   X86_TUNE_SLOW_IMUL_IMM32_MEM,
@@ -408,7 +407,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
 #define TARGET_EXT_80387_CONSTANTS \
        ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
-#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
 #define TARGET_AVOID_VECTOR_DECODE \
        ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
 #define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
index aa75d6b125ebdbf2d30b39330b25bddcb4821202..05449ba3c60c4271804b1bdee996e048159497cc 100644 (file)
   "operands[2] = gen_reg_rtx (SImode);")
 
 ;; When SSE is available, it is always faster to use it!
-(define_insn "fix_trunc<mode>di_sse"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
-  "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+(define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+       (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "%vcvtt<ssemodesuffix>2si{q}\t{%1, %0|%0, %1}"
+  "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "maybe_vex")
-   (set_attr "prefix_rex" "1")
-   (set_attr "mode" "<MODE>")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")
-   (set_attr "bdver1_decode" "double,double")])
-
-(define_insn "fix_trunc<mode>si_sse"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
-  "SSE_FLOAT_MODE_P (<MODE>mode)
-   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "%vcvtt<ssemodesuffix>2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "<MODE>")
+   (set (attr "prefix_rex")
+       (if_then_else
+         (match_test "<SWI48:MODE>mode == DImode")
+         (const_string "1")
+         (const_string "*")))
+   (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set_attr "bdver1_decode" "double,double")])
 
-;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
-(define_peephole2
-  [(set (match_operand:MODEF 0 "register_operand")
-       (match_operand:MODEF 1 "memory_operand"))
-   (set (match_operand:SWI48x 2 "register_operand")
-       (fix:SWI48x (match_dup 0)))]
-  "TARGET_SHORTEN_X87_SSE
-   && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ())
-   && peep2_reg_dead_p (2, operands[0])"
-  [(set (match_dup 2) (fix:SWI48x (match_dup 1)))])
-
 ;; Avoid vector decoded forms of the instruction.
 (define_peephole2
-  [(match_scratch:DF 2 "x")
-   (set (match_operand:SWI48x 0 "register_operand")
-       (fix:SWI48x (match_operand:DF 1 "memory_operand")))]
-  "TARGET_SSE2 && TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
-
-(define_peephole2
-  [(match_scratch:SF 2 "x")
-   (set (match_operand:SWI48x 0 "register_operand")
-       (fix:SWI48x (match_operand:SF 1 "memory_operand")))]
-  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+  [(match_scratch:MODEF 2 "x")
+   (set (match_operand:SWI48 0 "register_operand")
+       (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
+  "TARGET_AVOID_VECTOR_DECODE
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
+   && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
+   (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
 
 (define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
   [(set (match_operand:SWI248x 0 "nonimmediate_operand")
index 6f19c183eae82c5ad991c2a4713716449cbcd760..9df7b97a34e0f8874ca7123deb3dbfd1aa334b77 100644 (file)
@@ -1942,6 +1942,19 @@ process_alt_operands (int only_alternative)
              if (no_regs_p && REG_P (op))
                reject++;
 
+#ifdef SECONDARY_MEMORY_NEEDED
+             /* If reload requires moving value through secondary
+                memory, it will need one more insn at least.  */
+             if (this_alternative != NO_REGS 
+                 && REG_P (op) && (cl = get_reg_class (REGNO (op))) != NO_REGS
+                 && ((curr_static_id->operand[nop].type != OP_OUT
+                      && SECONDARY_MEMORY_NEEDED (cl, this_alternative,
+                                                  GET_MODE (op)))
+                     || (curr_static_id->operand[nop].type != OP_IN
+                         && SECONDARY_MEMORY_NEEDED (this_alternative, cl,
+                                                     GET_MODE (op)))))
+               losers++;
+#endif
              /* Input reloads can be inherited more often than output
                 reloads can be removed, so penalize output
                 reloads.  */
index cb6518751f62a04d2478e60c5158196acab36e15..0cff70940d632975589b68e2ca2a5e5a3ebca4d5 100644 (file)
@@ -1,3 +1,8 @@
+2012-11-20  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/19398
+       * gcc.target/i386/pr19398.c: New test.
+
 2012-11-20  Martin Jambor  <mjambor@suse.cz>
 
        PR tree-optimization/55260
diff --git a/gcc/testsuite/gcc.target/i386/pr19398.c b/gcc/testsuite/gcc.target/i386/pr19398.c
new file mode 100644 (file)
index 0000000..60931c0
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -msse -mno-sse3 -mfpmath=387" } */
+
+int test (float a)
+{
+  return (a * a);
+}
+
+/* { dg-final { scan-assembler-not "cvttss2si\[^\\n\]*%xmm" } } */