re PR target/32661 (__builtin_ia32_vec_ext suboptimal for pointer/ref args)
authorUros Bizjak <uros@gcc.gnu.org>
Tue, 28 Aug 2007 09:52:06 +0000 (11:52 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Tue, 28 Aug 2007 09:52:06 +0000 (11:52 +0200)
PR target/32661
* simplify-rtx.c (simplify_binary_operation_1) [VEC_SELECT]:
Simplify nested VEC_SELECT (with optional VEC_CONCAT operator as
operand) when top VEC_SELECT extracts scalar element.
* config/i386/sse.md (*vec_extract_v4si_mem): New.
(*vec_extract_v4sf_mem): Ditto.

testsuite/ChangeLog:

PR target/32661
* gcc.target/i386/pr32661.c: New test.

From-SVN: r127857

gcc/ChangeLog
gcc/config/i386/sse.md
gcc/simplify-rtx.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr32661.c [new file with mode: 0644]

index bc2e9bef119d2b9322142c4cce0539bb1da34963..3d9cdbb78cad3d6b11ee926f08a9f75045e77efa 100644 (file)
@@ -1,3 +1,12 @@
+2007-08-28  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/32661
+       * simplify-rtx.c (simplify_binary_operation_1) [VEC_SELECT]:
+       Simplify nested VEC_SELECT (with optional VEC_CONCAT operator as
+       operand) when top VEC_SELECT extracts scalar element.
+       * config/i386/sse.md (*vec_extract_v4si_mem): New pattern.
+       (*vec_extract_v4sf_mem): Ditto.
+
 2007-08-28  Jakub Jelinek  <jakub@redhat.com>
 
        PR middle-end/32370
 
 2007-08-23  Brian Sidebotham  <brian.sidebotham@gmail.com>
 
-       * configure.ac (leb128): Modify sed statement to work with any binutils
-       version string.
+       * configure.ac (leb128): Modify sed statement to work with any
+       binutils version string.
        * configure: Regenerate
 
 2007-08-23  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>
index 07969375f1d1e1dbb4ed9fe46d181cbc4006580c..6779e9a805a3980eebd977fae092079d14625222 100644 (file)
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "V4SF")])
 
+(define_insn_and_split "*vec_extract_v4sf_mem"
+  [(set (match_operand:SF 0 "register_operand" "=x*rf")
+       (vec_select:SF
+        (match_operand:V4SF 1 "memory_operand" "o")
+        (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int i = INTVAL (operands[2]);
+
+  emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
+  DONE;
+})
+
 (define_expand "vec_extractv4sf"
   [(match_operand:SF 0 "register_operand" "")
    (match_operand:V4SF 1 "register_operand" "")
   operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
 })
 
+(define_insn_and_split "*vec_ext_v4si_mem"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (vec_select:SI
+         (match_operand:V4SI 1 "memory_operand" "o")
+         (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int i = INTVAL (operands[2]);
+
+  emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
+  DONE;
+})
+
 (define_expand "sse_storeq"
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
        (vec_select:DI
index 97c4d9318050e5e0e9464397379748e89de76356..3271a86485172a66b18845c616f5351f389fd1b4 100644 (file)
@@ -2659,6 +2659,85 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
          if (GET_CODE (trueop0) == CONST_VECTOR)
            return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP
                                                      (trueop1, 0, 0)));
+
+         /* Extract a scalar element from a nested VEC_SELECT expression
+            (with optional nested VEC_CONCAT expression).  Some targets
+            (i386) extract scalar element from a vector using chain of
+            nested VEC_SELECT expressions.  When input operand is a memory
+            operand, this operation can be simplified to a simple scalar
+            load from an offseted memory address.  */
+         if (GET_CODE (trueop0) == VEC_SELECT)
+           {
+             rtx op0 = XEXP (trueop0, 0);
+             rtx op1 = XEXP (trueop0, 1);
+
+             enum machine_mode opmode = GET_MODE (op0);
+             int elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode));
+             int n_elts = GET_MODE_SIZE (opmode) / elt_size;
+
+             int i = INTVAL (XVECEXP (trueop1, 0, 0));
+             int elem;
+
+             rtvec vec;
+             rtx tmp_op, tmp;
+
+             gcc_assert (GET_CODE (op1) == PARALLEL);
+             gcc_assert (i < n_elts);
+
+             /* Select element, pointed by nested selector.  */
+             elem = INTVAL (CONST_VECTOR_ELT (op1, i));
+
+             /* Handle the case when nested VEC_SELECT wraps VEC_CONCAT.  */
+             if (GET_CODE (op0) == VEC_CONCAT)
+               {
+                 rtx op00 = XEXP (op0, 0);
+                 rtx op01 = XEXP (op0, 1);
+
+                 enum machine_mode mode00, mode01;
+                 int n_elts00, n_elts01;
+
+                 mode00 = GET_MODE (op00);
+                 mode01 = GET_MODE (op01);
+
+                 /* Find out number of elements of each operand.  */
+                 if (VECTOR_MODE_P (mode00))
+                   {
+                     elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode00));
+                     n_elts00 = GET_MODE_SIZE (mode00) / elt_size;
+                   }
+                 else
+                   n_elts00 = 1;
+
+                 if (VECTOR_MODE_P (mode01))
+                   {
+                     elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode01));
+                     n_elts01 = GET_MODE_SIZE (mode01) / elt_size;
+                   }
+                 else
+                   n_elts01 = 1;
+
+                 gcc_assert (n_elts == n_elts00 + n_elts01);
+
+                 /* Select correct operand of VEC_CONCAT
+                    and adjust selector. */
+                 if (elem < n_elts01)
+                   tmp_op = op00;
+                 else
+                   {
+                     tmp_op = op01;
+                     elem -= n_elts00;
+                   }
+               }
+             else
+               tmp_op = op0;
+
+             vec = rtvec_alloc (1);
+             RTVEC_ELT (vec, 0) = GEN_INT (elem);
+
+             tmp = gen_rtx_fmt_ee (code, mode,
+                                   tmp_op, gen_rtx_PARALLEL (VOIDmode, vec));
+             return tmp;
+           }
        }
       else
        {
index 47ee5d715d538b4cd87ea2cd478b2eccf3a258ba..f3a78804c85ddad6b53f766bc00ea24ed2fccb67 100644 (file)
@@ -1,3 +1,8 @@
+2007-08-28  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/32661
+       * gcc.target/i386/pr32661.c: New test.
+
 2007-08-28  Jakub Jelinek  <jakub@redhat.com>
 
        PR middle-end/32370
diff --git a/gcc/testsuite/gcc.target/i386/pr32661.c b/gcc/testsuite/gcc.target/i386/pr32661.c
new file mode 100644 (file)
index 0000000..247ae13
--- /dev/null
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+int fooSI_1(__v4si *val)
+{
+  return __builtin_ia32_vec_ext_v4si(*val, 1);
+}
+/* { dg-final { scan-assembler-not "pshufd" } } */
+
+int fooSI_2(__v4si *val)
+{
+  return __builtin_ia32_vec_ext_v4si(*val, 2);
+}
+/* { dg-final { scan-assembler-not "punpckhdq" } } */
+
+float fooSF_2(__v4sf *val)
+{
+  return __builtin_ia32_vec_ext_v4sf(*val, 2);
+}
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+
+float fooSF_3(__v4sf *val)
+{
+  return __builtin_ia32_vec_ext_v4sf(*val, 3);
+}
+/* { dg-final { scan-assembler-not "shufps" } } */