cxxrtl: speed up bit repeats (sign extends, etc).
authorwhitequark <whitequark@whitequark.org>
Mon, 21 Dec 2020 02:15:55 +0000 (02:15 +0000)
committerwhitequark <whitequark@whitequark.org>
Mon, 21 Dec 2020 02:20:34 +0000 (02:20 +0000)
On Minerva SoC SRAM, depending on the compiler, this change improves
overall time by 4-7%.

backends/cxxrtl/cxxrtl.h
backends/cxxrtl/cxxrtl_backend.cc

index 3c315c7dffd702785f8217b2baead11f786dc9fc..0a6bcb8494039224737e1f4fd37995c601c784fe 100644 (file)
@@ -317,6 +317,14 @@ struct value : public expr_base<value<Bits>> {
                return sext_cast<NewBits>()(*this);
        }
 
+       // Bit replication is far more efficient than the equivalent concatenation.
+       template<size_t Count>
+       CXXRTL_ALWAYS_INLINE
+       value<Bits * Count> repeat() const {
+               static_assert(Bits == 1, "repeat() is implemented only for 1-bit values");
+               return *this ? value<Bits * Count>().bit_not() : value<Bits * Count>();
+       }
+
        // Operations with run-time parameters (offsets, amounts, etc).
        //
        // These operations are used for computations.
index 3b2fb49856275b97187093383f4de2a79b6f69bc..916303bfe49d37f0bd491e490fb27ad70e460060 100644 (file)
@@ -832,11 +832,26 @@ struct CxxrtlWorker {
                } else if (sig.is_chunk()) {
                        return dump_sigchunk(sig.as_chunk(), is_lhs, for_debug);
                } else {
-                       dump_sigchunk(*sig.chunks().rbegin(), is_lhs, for_debug);
-                       for (auto it = sig.chunks().rbegin() + 1; it != sig.chunks().rend(); ++it) {
-                               f << ".concat(";
-                               dump_sigchunk(*it, is_lhs, for_debug);
-                               f << ")";
+                       bool first = true;
+                       auto chunks = sig.chunks();
+                       for (auto it = chunks.rbegin(); it != chunks.rend(); it++) {
+                               if (!first)
+                                       f << ".concat(";
+                               bool is_complex = dump_sigchunk(*it, is_lhs, for_debug);
+                               if (!is_lhs && it->width == 1) {
+                                       size_t repeat = 1;
+                                       while ((it + repeat) != chunks.rend() && *(it + repeat) == *it)
+                                               repeat++;
+                                       if (repeat > 1) {
+                                               if (is_complex)
+                                                       f << ".val()";
+                                               f << ".repeat<" << repeat << ">()";
+                                       }
+                                       it += repeat - 1;
+                               }
+                               if (!first)
+                                       f << ")";
+                               first = false;
                        }
                        return true;
                }