Add vector_memory_operand and "Bm" constraint
authorH.J. Lu <hongjiu.lu@intel.com>
Tue, 5 Jan 2016 20:17:26 +0000 (20:17 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Tue, 5 Jan 2016 20:17:26 +0000 (12:17 -0800)
SSE vector arithmetic and logic instructions only accept aligned memory
operand.  This patch adds vector_memory_operand and "Bm" constraint for
aligned SSE memory operand.  They are applied to SSE plusminus and
any_logic patterns.

gcc/

PR target/68991
* config/i386/constraints.md (Bm): New constraint.
* config/i386/predicates.md (vector_memory_operand): New
predicate.
* config/i386/sse.md: Replace xm with xBm in plusminus and
any_logic patterns.

gcc/testsuite/

PR target/68991
* g++.dg/pr68991-1.C: New test.
* g++.dg/pr68991-2.C: Likewise.

From-SVN: r232087

gcc/ChangeLog
gcc/config/i386/constraints.md
gcc/config/i386/predicates.md
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/g++.dg/pr68991-1.C [new file with mode: 0644]
gcc/testsuite/g++.dg/pr68991-2.C [new file with mode: 0644]

index 2a4a71fb52bb74173c2c4444ef994d96d7850796..264a7b6a4da6f5676d30f367b38a367fc1788820 100644 (file)
@@ -1,3 +1,12 @@
+2016-01-05  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/68991
+       * config/i386/constraints.md (Bm): New constraint.
+       * config/i386/predicates.md (vector_memory_operand): New
+       predicate.
+       * config/i386/sse.md: Replace xm with xBm in plusminus and
+       any_logic patterns.
+
 2016-01-05  Sandra Loosemore <sandra@codesourcery.com>
 
        PR 1078
index b3db1333308311eca46c3d6c1efdad90cbd60a18..bac9d6668a8d09e25e3e715e82bd20e75fd07212 100644 (file)
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
+;;  m  Vector memory operand
 ;;  s  Sibcall memory operand, not valid for TARGET_X32
 ;;  w  Call memory operand, not valid for TARGET_X32
 ;;  z  Constant call address operand.
   "@internal GOT memory operand."
   (match_operand 0 "GOT_memory_operand"))
 
+(define_constraint "Bm"
+  "@internal Vector memory operand."
+  (match_operand 0 "vector_memory_operand"))
+
 (define_constraint "Bs"
   "@internal Sibcall memory operand."
   (ior (and (not (match_test "TARGET_X32"))
index 6f0159b65dc44053454ce02d05a0fb87083a8de6..33b35945092ea8ce030af5166bc859f2b2d63d73 100644 (file)
        (match_test "INTEGRAL_MODE_P (GET_MODE (op))")
        (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
 
+; Return true when OP is operand acceptable for vector memory operand.
+; Only AVX can have misaligned memory operand.
+(define_predicate "vector_memory_operand"
+  (and (match_operand 0 "memory_operand")
+       (ior (match_test "TARGET_AVX")
+           (match_test "MEM_ALIGN (op) >= GET_MODE_ALIGNMENT (mode)"))))
+
 ; Return true when OP is operand acceptable for standard SSE move.
 (define_predicate "vector_move_operand"
   (ior (match_operand 0 "nonimmediate_operand")
index c3cd19932b219c39b0d4c815f7526e13eb1b8d55..ca3a831184fd6e5c34e4d33624523bac7efbfc62 100644 (file)
   [(set (match_operand:VF 0 "register_operand" "=x,v")
        (plusminus:VF
          (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
-         (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
+         (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
        (vec_merge:VF_128
          (plusminus:VF_128
            (match_operand:VF_128 1 "register_operand" "0,v")
-           (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
+           (match_operand:VF_128 2 "nonimmediate_operand" "xBm,<round_constraint>"))
          (match_dup 1)
          (const_int 1)))]
   "TARGET_SSE"
          (vec_concat:V2SF
            (plusminus:SF
              (vec_select:SF
-               (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
+               (match_operand:V4SF 2 "nonimmediate_operand" "xBm,xm")
                (parallel [(const_int 0)]))
              (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
            (plusminus:SF
   [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
        (any_logic:VF_128_256
          (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
-         (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
+         (match_operand:VF_128_256 2 "nonimmediate_operand" "xBm,vm")))]
   "TARGET_SSE && <mask_avx512vl_condition>
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   [(set (match_operand:TF 0 "register_operand" "=x,x")
        (any_logic:TF
          (match_operand:TF 1 "nonimmediate_operand" "%0,x")
-         (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
+         (match_operand:TF 2 "nonimmediate_operand" "xBm,xm")))]
   "TARGET_SSE
    && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
 {
   [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
        (plusminus:VI_AVX2
          (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
-         (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+         (match_operand:VI_AVX2 2 "nonimmediate_operand" "xBm,vm")))]
   "TARGET_SSE2
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
   [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
        (sat_plusminus:VI12_AVX2
          (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
-         (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+         (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xBm,vm")))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
   [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
        (any_logic:VI48_AVX_AVX512F
          (match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
-         (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+         (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xBm,vm")))]
   "TARGET_SSE && <mask_mode512bit_condition>
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
        (any_logic: VI12_AVX_AVX512F
          (match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
-         (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+         (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xBm,vm")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   static char buf[64];
index 2075964cab1513b08a72df5a29ea50a0e62cf393..64e13d66318c44eb9f87a52a70cb3a94520dde7e 100644 (file)
@@ -1,3 +1,9 @@
+2016-01-05  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/68991
+       * g++.dg/pr68991-1.C: New test.
+       * g++.dg/pr68991-2.C: Likewise.
+
 2016-01-05  Sergei Trofimovich <siarheit@google.com>
 
         PR other/60465
diff --git a/gcc/testsuite/g++.dg/pr68991-1.C b/gcc/testsuite/g++.dg/pr68991-1.C
new file mode 100644 (file)
index 0000000..744d13c
--- /dev/null
@@ -0,0 +1,191 @@
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-std=c++11 -O3 -msse2 -mno-avx -fno-exceptions -fno-rtti -fdump-rtl-final" }
+
+typedef unsigned int size_type;
+
+#define _GLIBCXX_BITSET_BITS_PER_WORD  (__CHAR_BIT__ * __SIZEOF_INT__)
+#define _GLIBCXX_BITSET_WORDS(__n) \
+  ((__n) / _GLIBCXX_BITSET_BITS_PER_WORD + \
+   ((__n) % _GLIBCXX_BITSET_BITS_PER_WORD == 0 ? 0 : 1))
+
+namespace std
+{
+  template<size_type _Nw>
+    struct _Base_bitset
+    {
+      typedef unsigned int _WordT;
+      _WordT           _M_w[_Nw];
+
+      _WordT&
+      _M_hiword()
+      { return _M_w[_Nw - 1]; }
+
+      void
+      _M_do_and(const _Base_bitset<_Nw>& __x)
+      {
+       for (size_type __i = 0; __i < _Nw; __i++)
+         _M_w[__i] &= __x._M_w[__i];
+      }
+
+      void
+      _M_do_flip()
+      {
+       for (size_type __i = 0; __i < _Nw; __i++)
+         _M_w[__i] = ~_M_w[__i];
+      }
+
+      bool
+      _M_is_equal(const _Base_bitset<_Nw>& __x) const
+      {
+       for (size_type __i = 0; __i < _Nw; ++__i)
+         if (_M_w[__i] != __x._M_w[__i])
+           return false;
+       return true;
+      }
+
+      bool
+      _M_is_any() const
+      {
+       for (size_type __i = 0; __i < _Nw; __i++)
+         if (_M_w[__i] != static_cast<_WordT>(0))
+           return true;
+       return false;
+      }
+    };
+
+  template<size_type _Extrabits>
+    struct _Sanitize
+    {
+      typedef unsigned int _WordT;
+
+      static void
+      _S_do_sanitize(_WordT& __val)
+      { __val &= ~((~static_cast<_WordT>(0)) << _Extrabits); }
+    };
+
+  template<size_type _Nb>
+    class bitset
+    : private _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)>
+    {
+    private:
+      typedef _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)> _Base;
+      typedef unsigned int _WordT;
+
+      void
+      _M_do_sanitize()
+      {
+       typedef _Sanitize<_Nb % _GLIBCXX_BITSET_BITS_PER_WORD> __sanitize_type;
+       __sanitize_type::_S_do_sanitize(this->_M_hiword());
+      }
+
+    public:
+      class reference
+      {
+       friend class bitset;
+
+       _WordT* _M_wp;
+       size_type       _M_bpos;
+
+      public:
+       reference&
+       flip()
+       {
+         *_M_wp ^= _Base::_S_maskbit(_M_bpos);
+         return *this;
+       }
+      };
+
+      bitset<_Nb>&
+      operator&=(const bitset<_Nb>& __rhs)
+      {
+       this->_M_do_and(__rhs);
+       return *this;
+      }
+
+      bitset<_Nb>&
+      flip()
+      {
+       this->_M_do_flip();
+       this->_M_do_sanitize();
+       return *this;
+      }
+
+      bitset<_Nb>
+      operator~() const
+      { return bitset<_Nb>(*this).flip(); }
+
+      bool
+      operator==(const bitset<_Nb>& __rhs) const
+      { return this->_M_is_equal(__rhs); }
+
+      bool
+      any() const
+      { return this->_M_is_any(); }
+    };
+
+  template<size_type _Nb>
+    inline bitset<_Nb>
+    operator&(const bitset<_Nb>& __x, const bitset<_Nb>& __y)
+    {
+      bitset<_Nb> __result(__x);
+      __result &= __y;
+      return __result;
+    }
+}
+template<typename T>
+class ArrayRef {
+public:
+    typedef const T *iterator;
+
+private:
+    const T *Data;
+    size_type Length;
+
+public:
+    iterator begin() const { return Data; }
+    iterator end() const { return Data + Length; }
+};
+
+const unsigned MAX_SUBTARGET_FEATURES = 128;
+class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
+};
+
+struct SubtargetFeatureKV {
+  FeatureBitset Value;
+  FeatureBitset Implies;
+};
+
+struct SubtargetInfoKV {
+  const void *Value;
+};
+class SubtargetFeatures {
+public:
+    FeatureBitset ToggleFeature(FeatureBitset Bits,
+                               const SubtargetFeatureKV *,
+                               ArrayRef<SubtargetFeatureKV> FeatureTable);
+};
+
+static
+void ClearImpliedBits(FeatureBitset &Bits,
+                     const SubtargetFeatureKV *FeatureEntry,
+                     ArrayRef<SubtargetFeatureKV> FeatureTable) {
+  for (auto &FE : FeatureTable) {
+    if ((FE.Implies & FeatureEntry->Value).any()) {
+      Bits &= ~FE.Value;
+      ClearImpliedBits(Bits, &FE, FeatureTable);
+    }
+  }
+}
+
+FeatureBitset
+SubtargetFeatures::ToggleFeature(FeatureBitset Bits,
+                                const SubtargetFeatureKV *FeatureEntry,
+                                ArrayRef<SubtargetFeatureKV> FeatureTable) {
+    if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+      Bits &= ~FeatureEntry->Value;
+      ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
+    }
+  return Bits;
+}
+
+// { dg-final { scan-rtl-dump-not "S16 A32\[^\n\]*\\\*xorv4si3" "final" } }
diff --git a/gcc/testsuite/g++.dg/pr68991-2.C b/gcc/testsuite/g++.dg/pr68991-2.C
new file mode 100644 (file)
index 0000000..a3c59f0
--- /dev/null
@@ -0,0 +1,191 @@
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-std=c++11 -O3 -msse2 -mno-avx -fno-exceptions -fno-rtti -fdump-rtl-final" }
+
+typedef unsigned int size_type;
+
+#define _GLIBCXX_BITSET_BITS_PER_WORD  (__CHAR_BIT__ * __SIZEOF_INT__)
+#define _GLIBCXX_BITSET_WORDS(__n) \
+  ((__n) / _GLIBCXX_BITSET_BITS_PER_WORD + \
+   ((__n) % _GLIBCXX_BITSET_BITS_PER_WORD == 0 ? 0 : 1))
+
+namespace std
+{
+  template<size_type _Nw>
+    struct _Base_bitset
+    {
+      typedef unsigned int _WordT;
+      _WordT           _M_w[_Nw];
+
+      _WordT&
+      _M_hiword()
+      { return _M_w[_Nw - 1]; }
+
+      void
+      _M_do_and(const _Base_bitset<_Nw>& __x)
+      {
+       for (size_type __i = 0; __i < _Nw; __i++)
+         _M_w[__i] += __x._M_w[__i];
+      }
+
+      void
+      _M_do_flip()
+      {
+       for (size_type __i = 0; __i < _Nw; __i++)
+         _M_w[__i] = ~_M_w[__i];
+      }
+
+      bool
+      _M_is_equal(const _Base_bitset<_Nw>& __x) const
+      {
+       for (size_type __i = 0; __i < _Nw; ++__i)
+         if (_M_w[__i] != __x._M_w[__i])
+           return false;
+       return true;
+      }
+
+      bool
+      _M_is_any() const
+      {
+       for (size_type __i = 0; __i < _Nw; __i++)
+         if (_M_w[__i] != static_cast<_WordT>(0))
+           return true;
+       return false;
+      }
+    };
+
+  template<size_type _Extrabits>
+    struct _Sanitize
+    {
+      typedef unsigned int _WordT;
+
+      static void
+      _S_do_sanitize(_WordT& __val)
+      { __val &= ~((~static_cast<_WordT>(0)) << _Extrabits); }
+    };
+
+  template<size_type _Nb>
+    class bitset
+    : private _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)>
+    {
+    private:
+      typedef _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)> _Base;
+      typedef unsigned int _WordT;
+
+      void
+      _M_do_sanitize()
+      {
+       typedef _Sanitize<_Nb % _GLIBCXX_BITSET_BITS_PER_WORD> __sanitize_type;
+       __sanitize_type::_S_do_sanitize(this->_M_hiword());
+      }
+
+    public:
+      class reference
+      {
+       friend class bitset;
+
+       _WordT* _M_wp;
+       size_type       _M_bpos;
+
+      public:
+       reference&
+       flip()
+       {
+         *_M_wp ^= _Base::_S_maskbit(_M_bpos);
+         return *this;
+       }
+      };
+
+      bitset<_Nb>&
+      operator&=(const bitset<_Nb>& __rhs)
+      {
+       this->_M_do_and(__rhs);
+       return *this;
+      }
+
+      bitset<_Nb>&
+      flip()
+      {
+       this->_M_do_flip();
+       this->_M_do_sanitize();
+       return *this;
+      }
+
+      bitset<_Nb>
+      operator~() const
+      { return bitset<_Nb>(*this).flip(); }
+
+      bool
+      operator==(const bitset<_Nb>& __rhs) const
+      { return this->_M_is_equal(__rhs); }
+
+      bool
+      any() const
+      { return this->_M_is_any(); }
+    };
+
+  template<size_type _Nb>
+    inline bitset<_Nb>
+    operator&(const bitset<_Nb>& __x, const bitset<_Nb>& __y)
+    {
+      bitset<_Nb> __result(__x);
+      __result &= __y;
+      return __result;
+    }
+}
+template<typename T>
+class ArrayRef {
+public:
+    typedef const T *iterator;
+
+private:
+    const T *Data;
+    size_type Length;
+
+public:
+    iterator begin() const { return Data; }
+    iterator end() const { return Data + Length; }
+};
+
+const unsigned MAX_SUBTARGET_FEATURES = 128;
+class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
+};
+
+struct SubtargetFeatureKV {
+  FeatureBitset Value;
+  FeatureBitset Implies;
+};
+
+struct SubtargetInfoKV {
+  const void *Value;
+};
+class SubtargetFeatures {
+public:
+    FeatureBitset ToggleFeature(FeatureBitset Bits,
+                               const SubtargetFeatureKV *,
+                               ArrayRef<SubtargetFeatureKV> FeatureTable);
+};
+
+static
+void ClearImpliedBits(FeatureBitset &Bits,
+                     const SubtargetFeatureKV *FeatureEntry,
+                     ArrayRef<SubtargetFeatureKV> FeatureTable) {
+  for (auto &FE : FeatureTable) {
+    if ((FE.Implies & FeatureEntry->Value).any()) {
+      Bits &= ~FE.Value;
+      ClearImpliedBits(Bits, &FE, FeatureTable);
+    }
+  }
+}
+
+FeatureBitset
+SubtargetFeatures::ToggleFeature(FeatureBitset Bits,
+                                const SubtargetFeatureKV *FeatureEntry,
+                                ArrayRef<SubtargetFeatureKV> FeatureTable) {
+    if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+      Bits &= ~FeatureEntry->Value;
+      ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
+    }
+  return Bits;
+}
+
+// { dg-final { scan-rtl-dump-not "S16 A32\[^\n\]*\\\*addv4si3" "final" } }