i386.h (SSE_VEC_FLOAT_MODE_P): Remove.
authorUros Bizjak <uros@gcc.gnu.org>
Mon, 18 Apr 2011 19:48:09 +0000 (21:48 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Mon, 18 Apr 2011 19:48:09 +0000 (21:48 +0200)
* config/i386/i386.h (SSE_VEC_FLOAT_MODE_P): Remove.
(AVX_FLOAT_MODE_P): Ditto.
(AVX128_VEC_FLOAT_MODE_P): Ditto.
(AVX256_VEC_FLOAT_MODE_P): Ditto.
(AVX_VEC_FLOAT_MODE_P): Ditto.
* config/i386/i386.md (UNSPEC_MASKLOAD): Remove.
(UNSPEC_MASKSTORE): Ditto.
* config/i386/sse.md (<sse>_movmsk<ssemodesuffix><avxmodesuffix>):
Merge from <sse>_movmsk<ssemodesuffix> and
avx_movmsk<ssemodesuffix>256.  Use VF mode iterator.
(*sse2_maskmovdqu): Merge with *sse2_maskmovdqu_rex64.  Use P mode
iterator.
(avx_maskload<ssemodesuffix><avxmodesuffix>): New expander.
(avx_maskstore<ssemodesuffix><avxmodesuffix>): Ditto.
(*avx_maskmov<ssemodesuffix><avxmodesuffix>): New insn.

testsuite/ChangeLog:

* gcc.target/i386/sse2-maskmovdqu.c: New test.
* gcc.target/i386/avx-vmaskmovdqu.c: Ditto.

From-SVN: r172669

gcc/ChangeLog
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx-vmaskmovdqu.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse2-maskmovdqu.c [new file with mode: 0644]

index 718056a85f94963f2e350bd79bd6b503bee9430c..9a56f9a1ce5425f3fa4a4898ee72f494641ac53e 100644 (file)
@@ -1,3 +1,21 @@
+2011-04-18  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.h (SSE_VEC_FLOAT_MODE_P): Remove.
+       (AVX_FLOAT_MODE_P): Ditto.
+       (AVX128_VEC_FLOAT_MODE_P): Ditto.
+       (AVX256_VEC_FLOAT_MODE_P): Ditto.
+       (AVX_VEC_FLOAT_MODE_P): Ditto.
+       * config/i386/i386.md (UNSPEC_MASKLOAD): Remove.
+       (UNSPEC_MASKSTORE): Ditto.
+       * config/i386/sse.md (<sse>_movmsk<ssemodesuffix><avxmodesuffix>):
+       Merge from <sse>_movmsk<ssemodesuffix> and
+       avx_movmsk<ssemodesuffix>256.  Use VF mode iterator.
+       (*sse2_maskmovdqu): Merge with *sse2_maskmovdqu_rex64.  Use P mode
+       iterator.
+       (avx_maskload<ssemodesuffix><avxmodesuffix>): New expander.
+       (avx_maskstore<ssemodesuffix><avxmodesuffix>): Ditto.
+       (*avx_maskmov<ssemodesuffix><avxmodesuffix>): New insn.
+
 2011-04-18  Jan Hubicka  <jh@suse.cz>
 
        * ipa-inline.c (inline_small_functions): Fix pasto in previous patch.
@@ -10,8 +28,7 @@
        (inline_small_functions): Move program size estimates here;
        actually process whole queue even when unit growth has been
        met. (to properly compute inline_failed reasons and for the
-       case unit size decrease.) Revisit comments on recursive
-       inlining.
+       case unit size decrease.) Revisit comments on recursive inlining.
        (ipa_inline): Remove unit summary code; first inline hot calls
        of functions called once, cold calls next.
        (order, nnodes): Remove unused variables.
        want_inline_small_function_p.
        (cgraph_decide_recursive_inlining): Rename to...
        (recursive_inlining): Use can_inline_edge_p and
-       want_inline_self_recursive_call_p; simplify and
-       remove no longer valid FIXME.
+       want_inline_self_recursive_call_p; simplify and remove no longer
+       valid FIXME.
        (cgraph_set_inline_failed): Remove.
        (add_new_edges_to_heap): Use can_inline_edge_p and
        want_inline_small_function_p.
        (cgraph_decide_inlining_of_small_functions): Rename to ...
        (inline_small_functions): ... this one; cleanup; use
-       can/want predicates; cleanup debug ouput; work edges
-       till fibheap is exhausted and do not stop once unit
-       growth is reached; remove later loop processing remaining
-       edges.
+       can/want predicates; cleanup debug ouput; work edges till fibheap
+       is exhausted and do not stop once unit growth is reached; remove
+       later loop processing remaining edges.
        (cgraph_flatten): Rename to ...
        (flatten_function): ... this one; use can_inline_edge_p
        and can_early_inline_edge_p predicates.
        inlining functions called once; simplify the pass.
        (cgraph_perform_always_inlining): Rename to ...
        (inline_always_inline_functions): ... this one; use
-       DECL_DISREGARD_INLINE_LIMITS; use can_inline_edge_p
-       predicate
+       DECL_DISREGARD_INLINE_LIMITS; use can_inline_edge_p predicate.
        (cgraph_decide_inlining_incrementally): Rename to ...
        (early_inline_small_functions): ... this one; simplify
        using new predicates; cleanup; make dumps prettier.
        (initialize_inline_failed): Move here from cgraph.c.
        * tree-sra.c: Include ipa-inline.h.
        (ipa_sra_preliminary_function_checks): Update.
-       * Makefile.in: (cgraph.o, cgraphbuild.o): Add dependency on
-       ipa-inline.h
+       * Makefile.in (cgraph.o, cgraphbuild.o): Add dependency on
+       ipa-inline.h.
 
 2011-04-16  Uros Bizjak  <ubizjak@gmail.com>
 
            Eric Weddington  <eric.weddington@atmel.com>
            Georg-Johann Lay <avr@gjlay.de>
 
-       * config/avr/avr.c: ("insn-codes.h", "optabs.h", "langhooks.h"):
+       * config/avr/avr.c ("insn-codes.h", "optabs.h", "langhooks.h"):
        New Includes
        (avr_init_builtins, avr_expand_builtin,
        avr_expand_delay_cycles, avr_expand_unop_builtin,
 
        2010-08-13  Vladimir Makarov  <vmakarov@redhat.com>
 
-       * ira-build.c: (ira_create_object): Remove initialization of
+       * ira-build.c (ira_create_object): Remove initialization of
        OBJECT_PROFITABLE_HARD_REGS.  Initialize OBJECT_ADD_DATA.
        (ira_create_allocno): Remove initialization of
        ALLOCNO_MEM_OPTIMIZED_DEST, ALLOCNO_MEM_OPTIMIZED_DEST_P,
        ira_object_conflict_iter_cond.
        (FOR_EACH_OBJECT_CONFLICT): Don't use ira_object_conflict_iter_next.
 
-       * ira-live.c: (process_single_reg_class_operands): Call
+       * ira-live.c (process_single_reg_class_operands): Call
        ira_init_register_move_cost_if_necessary.  Use
        ira_register_move_cost instead of ira_get_register_move_cost.
 
 
 2011-03-14  Andreas Tobler  <andreast@fgznet.ch>
 
-       * config/rs6000/freebsd.h: (RELOCATABLE_NEEDS_FIXUP): Define in
+       * config/rs6000/freebsd.h (RELOCATABLE_NEEDS_FIXUP): Define in
        terms of target_flags_explicit. Adjust copyright year.
 
        * config.gcc: Add FreeBSD PowerPC soft-float libgcc bits.
 
 2011-01-07  Jan Hubicka  <jh@suse.cz>
 
-       * doc/invoke.texi: (-flto, -fuse-linker-plugin): Update defaults
+       * doc/invoke.texi (-flto, -fuse-linker-plugin): Update defaults
        and no longer claim that gold is required for linker plugin.
        * configure: Regenerate.
        * gcc.c (PLUGIN_COND): New macro.
index 511429a727a4cfe7f46f20f7095136c43990b3d8..10fc1260b58be35a0fddbc89db7ab04d658a8933 100644 (file)
@@ -1328,22 +1328,6 @@ enum reg_class
 #define SSE_FLOAT_MODE_P(MODE) \
   ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
 
-#define SSE_VEC_FLOAT_MODE_P(MODE) \
-  ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
-
-#define AVX_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == SFmode || (MODE) == DFmode))
-
-#define AVX128_VEC_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode))
-
-#define AVX256_VEC_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == V8SFmode || (MODE) == V4DFmode))
-
-#define AVX_VEC_FLOAT_MODE_P(MODE) \
-  (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode \
-                 || (MODE) == V8SFmode || (MODE) == V4DFmode))
-
 #define FMA4_VEC_FLOAT_MODE_P(MODE) \
   (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
                  || (MODE) == V8SFmode || (MODE) == V4DFmode))
index f896bc9b9037719ea7100cccdd24b0a30677b7bc..0e0ceed4260b4391ec5e5543c8d22f4827abbc9e 100644 (file)
   UNSPEC_VPERMIL
   UNSPEC_VPERMIL2
   UNSPEC_VPERMIL2F128
-  UNSPEC_MASKLOAD
-  UNSPEC_MASKSTORE
   UNSPEC_CAST
   UNSPEC_VTESTP
   UNSPEC_VCVTPH2PS
index 928bf784031fcd2be1a743cac9ced7abe905dae6..63da5dfe955105f83144098f8de637dab4ba7353 100644 (file)
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_insn "avx_movmsk<ssemodesuffix>256"
+(define_insn "<sse>_movmsk<ssemodesuffix><avxmodesuffix>"
   [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI
-         [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
+         [(match_operand:VF 1 "register_operand" "x")]
          UNSPEC_MOVMSK))]
-  "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
-  "vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "<sse>_movmsk<ssemodesuffix>"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI
-         [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
-         UNSPEC_MOVMSK))]
-  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  ""
   "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "maybe_vex")
   "TARGET_SSE2")
 
 (define_insn "*sse2_maskmovdqu"
-  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
-       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
-                      (match_operand:V16QI 2 "register_operand" "x")
-                      (mem:V16QI (match_dup 0))]
-                     UNSPEC_MASKMOV))]
-  "TARGET_SSE2 && !TARGET_64BIT"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "%vmaskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix_data16" "1")
-   ;; The implicit %rdi operand confuses default length_vex computation.
-   (set_attr "length_vex" "3")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "TI")])
-
-(define_insn "*sse2_maskmovdqu_rex64"
-  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
+  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
                       (match_operand:V16QI 2 "register_operand" "x")
                       (mem:V16QI (match_dup 0))]
                      UNSPEC_MASKMOV))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
+  "TARGET_SSE2"
   "%vmaskmovdqu\t{%2, %1|%1, %2}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix_data16" "1")
    ;; The implicit %rdi operand confuses default length_vex computation.
    (set (attr "length_vex")
-     (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
+     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_maskload<ssemodesuffix><avxmodesuffix>"
-  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
-       (unspec:AVXMODEF2P
-         [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
-          (match_operand:<avxpermvecmode> 2 "register_operand" "x")
+(define_expand "avx_maskload<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:VF 0 "register_operand" "")
+       (unspec:VF
+         [(match_operand:<avxpermvecmode> 2 "register_operand" "")
+          (match_operand:VF 1 "memory_operand" "")
           (match_dup 0)]
-         UNSPEC_MASKLOAD))]
-  "TARGET_AVX"
-  "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
-  [(set_attr "type" "sselog1")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "<MODE>")])
+         UNSPEC_MASKMOV))]
+  "TARGET_AVX")
 
-(define_insn "avx_maskstore<ssemodesuffix><avxmodesuffix>"
-  [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
-       (unspec:AVXMODEF2P
-         [(match_operand:<avxpermvecmode> 1 "register_operand" "x")
-          (match_operand:AVXMODEF2P 2 "register_operand" "x")
+(define_expand "avx_maskstore<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:VF 0 "memory_operand" "")
+       (unspec:VF
+         [(match_operand:<avxpermvecmode> 1 "register_operand" "")
+          (match_operand:VF 2 "register_operand" "")
           (match_dup 0)]
-         UNSPEC_MASKSTORE))]
-  "TARGET_AVX"
+         UNSPEC_MASKMOV))]
+  "TARGET_AVX")
+
+(define_insn "*avx_maskmov<ssemodesuffix><avxmodesuffix>"
+  [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
+       (unspec:VF
+         [(match_operand:<avxpermvecmode> 1 "register_operand" "x,x")
+          (match_operand:VF 2 "nonimmediate_operand" "m,x")
+          (match_dup 0)]
+         UNSPEC_MASKMOV))]
+  "TARGET_AVX
+   && (REG_P (operands[0]) == MEM_P (operands[2]))"
   "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
index 58bf81ac2a890f07de26412dc7628882f5a8deb6..5295ec5809f72a70dd8b695a7493608832575244 100644 (file)
@@ -1,3 +1,8 @@
+2011-04-18  Uros Bizjak  <ubizjak@gmail.com>
+
+       * gcc.target/i386/sse2-maskmovdqu.c: New test.
+       * gcc.target/i386/avx-vmaskmovdqu.c: Ditto.
+
 2011-04-18  Tobias Burnus  <burnus@net-b.de>
 
        PR fortran/18918
diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovdqu.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovdqu.c
new file mode 100644 (file)
index 0000000..24b5bba
--- /dev/null
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse2-maskmovdqu.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse2-maskmovdqu.c b/gcc/testsuite/gcc.target/i386/sse2-maskmovdqu.c
new file mode 100644 (file)
index 0000000..b401c85
--- /dev/null
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -msse2" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+
+#ifndef MASK
+#define MASK 0x7986
+#endif
+
+#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 7)
+
+void static
+TEST (void)
+{
+  __m128i src, mask;
+  char s[16] = { 1,-2,3,-4,5,-6,7,-8,9,-10,11,-12,13,-14,15,-16 };
+  char m[16];
+
+  char u[20] = { 0 };
+  int i;
+
+  for (i = 0; i < 16; i++)
+    m[i] = mask_v (i);
+
+  src = _mm_loadu_si128 ((__m128i *)s);
+  mask = _mm_loadu_si128 ((__m128i *)m);
+
+  _mm_maskmoveu_si128 (src, mask, u+3);
+
+  for (i = 0; i < 16; i++)
+    if (u[i+3] != (m[i] ? s[i] : 0))
+      abort ();
+}