Merge pull request #2633 from whitequark/cxxrtl-no-top

[yosys.git] / backends / cxxrtl / cxxrtl.h
diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h

index 59393e415cf125a76eb4e0e860aa95339b39709e..0e55c46c223986aea5bdf57009f9c671bd9ddebd 100644 (file)
--- a/backends/cxxrtl/cxxrtl.h
+++ b/backends/cxxrtl/cxxrtl.h
@@ -41,18 +41,29 @@
  
  #include <backends/cxxrtl/cxxrtl_capi.h>
  
+#ifndef __has_attribute
+#      define __has_attribute(x) 0
+#endif
+
  // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector.
  // It generates a lot of specialized template functions with relatively large bodies that, when inlined
  // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities.
  // Because of this, most of the CXXRTL runtime must be always inlined for best performance.
-#ifndef __has_attribute
-#      define __has_attribute(x) 0
-#endif
  #if __has_attribute(always_inline)
  #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__))
  #else
  #define CXXRTL_ALWAYS_INLINE inline
  #endif
+// Conversely, some functions in the generated code are extremely large yet very cold, with both of these
+// properties being extreme enough to confuse C++ compilers into spending pathological amounts of time
+// on a futile (the code becomes worse) attempt to optimize the least important parts of code.
+#if __has_attribute(optnone)
+#define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__))
+#elif __has_attribute(optimize)
+#define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0)))
+#else
+#define CXXRTL_EXTREMELY_COLD
+#endif
  
  // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior
  // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at
@@ -306,6 +317,14 @@ struct value : public expr_base<value<Bits>> {
                 return sext_cast<NewBits>()(*this);
         }
  
+       // Bit replication is far more efficient than the equivalent concatenation.
+       template<size_t Count>
+       CXXRTL_ALWAYS_INLINE
+       value<Bits * Count> repeat() const {
+               static_assert(Bits == 1, "repeat() is implemented only for 1-bit values");
+               return *this ? value<Bits * Count>().bit_not() : value<Bits * Count>();
+       }
+
         // Operations with run-time parameters (offsets, amounts, etc).
         //
         // These operations are used for computations.
@@ -1198,49 +1217,49 @@ value<BitsY> xnor_ss(const value<BitsA> &a, const value<BitsB> &b) {
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> shl_uu(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template zcast<BitsY>().template shl(b);
+       return a.template zcast<BitsY>().shl(b);
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> shl_su(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template scast<BitsY>().template shl(b);
+       return a.template scast<BitsY>().shl(b);
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> sshl_uu(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template zcast<BitsY>().template shl(b);
+       return a.template zcast<BitsY>().shl(b);
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> sshl_su(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template scast<BitsY>().template shl(b);
+       return a.template scast<BitsY>().shl(b);
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> shr_uu(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template shr(b).template zcast<BitsY>();
+       return a.shr(b).template zcast<BitsY>();
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> shr_su(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template shr(b).template scast<BitsY>();
+       return a.shr(b).template scast<BitsY>();
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> sshr_uu(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template shr(b).template zcast<BitsY>();
+       return a.shr(b).template zcast<BitsY>();
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>
  CXXRTL_ALWAYS_INLINE
  value<BitsY> sshr_su(const value<BitsA> &a, const value<BitsB> &b) {
-       return a.template sshr(b).template scast<BitsY>();
+       return a.sshr(b).template scast<BitsY>();
  }
  
  template<size_t BitsY, size_t BitsA, size_t BitsB>