write_cxxrtl: improve writable memory handling.
authorwhitequark <whitequark@whitequark.org>
Sun, 5 Apr 2020 02:06:26 +0000 (02:06 +0000)
committerwhitequark <whitequark@whitequark.org>
Thu, 9 Apr 2020 04:08:36 +0000 (04:08 +0000)
This commit reduces space and time overhead for writable memories
to O(write port count) in both cases; implements handling for write
port priorities; and simplifies runtime representation of memories.

backends/cxxrtl/cxxrtl.cc
backends/cxxrtl/cxxrtl.h

index 710462e96e7a487892624347e4edf781434c8e54..6fd63548f7e4164d1e7ae27ad62874222cd75f32 100644 (file)
@@ -827,7 +827,7 @@ struct CxxrtlWorker {
                        }
                        RTLIL::Memory *memory = cell->module->memories[cell->getParam(ID(MEMID)).decode_string()];
                        std::string valid_index_temp = fresh_temporary();
-                       f << indent << "std::pair<bool, size_t> " << valid_index_temp << " = memory_index(";
+                       f << indent << "auto " << valid_index_temp << " = memory_index(";
                        dump_sigspec_rhs(cell->getPort(ID(ADDR)));
                        f << ", " << memory->start_offset << ", " << memory->size << ");\n";
                        if (cell->type == ID($memrd)) {
@@ -844,8 +844,8 @@ struct CxxrtlWorker {
                                // larger program) will never crash the code that calls into it.
                                //
                                // If assertions are disabled, out of bounds reads are defined to return zero.
-                               f << indent << "assert(" << valid_index_temp << ".first && \"out of bounds read\");\n";
-                               f << indent << "if(" << valid_index_temp << ".first) {\n";
+                               f << indent << "assert(" << valid_index_temp << ".valid && \"out of bounds read\");\n";
+                               f << indent << "if(" << valid_index_temp << ".valid) {\n";
                                inc_indent();
                                        if (writable_memories[memory]) {
                                                std::string addr_temp = fresh_temporary();
@@ -854,17 +854,22 @@ struct CxxrtlWorker {
                                                f << ";\n";
                                                std::string lhs_temp = fresh_temporary();
                                                f << indent << "value<" << memory->width << "> " << lhs_temp << " = "
-                                                           << mangle(memory) << "[" << valid_index_temp << ".second].curr;\n";
-                                               for (auto memwr_cell : transparent_for[cell]) {
+                                                           << mangle(memory) << "[" << valid_index_temp << ".index];\n";
+                                               std::vector<const RTLIL::Cell*> memwr_cells(transparent_for[cell].begin(), transparent_for[cell].end());
+                                               std::sort(memwr_cells.begin(), memwr_cells.end(),
+                                                       [](const RTLIL::Cell *a, const RTLIL::Cell *b) {
+                                                               return a->getParam(ID(PRIORITY)).as_int() < b->getParam(ID(PRIORITY)).as_int();
+                                                       });
+                                               for (auto memwr_cell : memwr_cells) {
                                                        f << indent << "if (" << addr_temp << " == ";
                                                        dump_sigspec_rhs(memwr_cell->getPort(ID(ADDR)));
                                                        f << ") {\n";
                                                        inc_indent();
                                                                f << indent << lhs_temp << " = " << lhs_temp;
                                                                f << ".update(";
-                                                               dump_sigspec_rhs(memwr_cell->getPort(ID(EN)));
-                                                               f << ", ";
                                                                dump_sigspec_rhs(memwr_cell->getPort(ID(DATA)));
+                                                               f << ", ";
+                                                               dump_sigspec_rhs(memwr_cell->getPort(ID(EN)));
                                                                f << ");\n";
                                                        dec_indent();
                                                        f << indent << "}\n";
@@ -875,7 +880,7 @@ struct CxxrtlWorker {
                                        } else {
                                                f << indent;
                                                dump_sigspec_lhs(cell->getPort(ID(DATA)));
-                                               f << " = " << mangle(memory) << "[" << valid_index_temp << ".second];\n";
+                                               f << " = " << mangle(memory) << "[" << valid_index_temp << ".index];\n";
                                        }
                                dec_indent();
                                f << indent << "} else {\n";
@@ -890,22 +895,18 @@ struct CxxrtlWorker {
                                        f << indent << "}\n";
                                }
                        } else /*if (cell->type == ID($memwr))*/ {
-                               // FIXME: handle write port priority, here and above in transparent $memrd cells
                                log_assert(writable_memories[memory]);
                                // See above for rationale of having both the assert and the condition.
                                //
                                // If assertions are disabled, out of bounds writes are defined to do nothing.
-                               f << indent << "assert(" << valid_index_temp << ".first && \"out of bounds write\");\n";
-                               f << indent << "if (" << valid_index_temp << ".first) {\n";
+                               f << indent << "assert(" << valid_index_temp << ".valid && \"out of bounds write\");\n";
+                               f << indent << "if (" << valid_index_temp << ".valid) {\n";
                                inc_indent();
-                                       std::string lhs_temp = fresh_temporary();
-                                       f << indent << "wire<" << memory->width << "> &" << lhs_temp << " = ";
-                                       f << mangle(memory) << "[" << valid_index_temp << ".second];\n";
-                                       f << indent << lhs_temp << ".next = " << lhs_temp << ".curr.update(";
-                                       dump_sigspec_rhs(cell->getPort(ID(EN)));
-                                       f << ", ";
+                                       f << indent << mangle(memory) << ".update(" << valid_index_temp << ".index, ";
                                        dump_sigspec_rhs(cell->getPort(ID(DATA)));
-                                       f << ");\n";
+                                       f << ", ";
+                                       dump_sigspec_rhs(cell->getPort(ID(EN)));
+                                       f << ", " << cell->getParam(ID(PRIORITY)).as_int() << ");\n";
                                dec_indent();
                                f << indent << "}\n";
                        }
@@ -1122,8 +1123,8 @@ struct CxxrtlWorker {
                });
 
                dump_attrs(memory);
-               f << indent << "memory_" << (writable_memories[memory] ? "rw" : "ro")
-                           << "<" << memory->width << "> " << mangle(memory)
+               f << indent << (writable_memories[memory] ? "" : "const ")
+                           << "memory<" << memory->width << "> " << mangle(memory)
                            << " { " << memory->size << "u";
                if (init_cells.empty()) {
                        f << " };\n";
@@ -1135,8 +1136,7 @@ struct CxxrtlWorker {
                                        RTLIL::Const data = cell->getPort(ID(DATA)).as_const();
                                        size_t width = cell->getParam(ID(WIDTH)).as_int();
                                        size_t words = cell->getParam(ID(WORDS)).as_int();
-                                       f << indent << "memory_" << (writable_memories[memory] ? "rw" : "ro")
-                                                   << "<" << memory->width << ">::init<" << words << "> { "
+                                       f << indent << "memory<" << memory->width << ">::init<" << words << "> { "
                                                    << stringf("%#x", cell->getPort(ID(ADDR)).as_int()) << ", {";
                                        inc_indent();
                                                for (size_t n = 0; n < words; n++) {
@@ -1257,10 +1257,7 @@ struct CxxrtlWorker {
                        for (auto memory : module->memories) {
                                if (!writable_memories[memory.second])
                                        continue;
-                               f << indent << "for (size_t i = 0; i < " << memory.second->size << "u; i++)\n";
-                               inc_indent();
-                                       f << indent << "changed |= " << mangle(memory.second) << "[i].commit();\n";
-                               dec_indent();
+                               f << indent << "changed |= " << mangle(memory.second) << ".commit();\n";
                        }
                        for (auto cell : module->cells()) {
                                if (is_internal_cell(cell->type))
index 18e45e22c35b6488aff87a81032e09134cc35817..593c31c289b61059ba5e891ce7f1fb616377e4b4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *  yosys -- Yosys Open SYnthesis Suite
  *
- *  Copyright (C) 2019  whitequark <whitequark@whitequark.org>
+ *  Copyright (C) 2019-2020  whitequark <whitequark@whitequark.org>
  *
  *  Permission to use, copy, modify, and/or distribute this software for any
  *  purpose with or without fee is hereby granted.
@@ -28,6 +28,7 @@
 #include <type_traits>
 #include <tuple>
 #include <vector>
+#include <algorithm>
 #include <sstream>
 
 // The cxxrtl support library implements compile time specialized arbitrary width arithmetics, as well as provides
@@ -73,9 +74,6 @@ struct value : public expr_base<value<Bits>> {
        template<typename... Init>
        explicit constexpr value(Init ...init) : data{init...} {}
 
-       // This allows using value<> as well as wire<> in memory initializers.
-       using init = value<Bits>;
-
        value(const value<Bits> &) = default;
        value(value<Bits> &&) = default;
        value<Bits> &operator=(const value<Bits> &) = default;
@@ -297,7 +295,7 @@ struct value : public expr_base<value<Bits>> {
                return result;
        }
 
-       value<Bits> update(const value<Bits> &mask, const value<Bits> &val) const {
+       value<Bits> update(const value<Bits> &val, const value<Bits> &mask) const {
                return bit_and(mask.bit_not()).bit_or(val.bit_and(mask));
        }
 
@@ -559,19 +557,6 @@ struct wire {
        wire(wire<Bits> &&) = default;
        wire<Bits> &operator=(const wire<Bits> &) = delete;
 
-       // We want to avoid having operator=(wire<>) or operator=(value<>) that overwrites both curr and next,
-       // since this operation is almost always wrong. But we also need an operation like that for memory
-       // initialization. This is solved by adding a wrapper and making the use of operator= valid only when
-       // this wrapper is used.
-       struct init {
-               value<Bits> data;
-       };
-
-       wire<Bits> &operator=(const init &init) {
-               curr = next = init.data;
-               return *this;
-       }
-
        bool commit() {
                if (curr != next) {
                        curr = next;
@@ -587,12 +572,10 @@ std::ostream &operator<<(std::ostream &os, const wire<Bits> &val) {
        return os;
 }
 
-template<class Elem>
+template<size_t Width>
 struct memory {
-       using StoredElem = typename std::remove_const<Elem>::type;
-       std::vector<StoredElem> data;
+       std::vector<value<Width>> data;
 
-       static constexpr size_t width = StoredElem::bits;
        size_t depth() const {
                return data.size();
        }
@@ -600,8 +583,8 @@ struct memory {
        memory() = delete;
        explicit memory(size_t depth) : data(depth) {}
 
-       memory(const memory<Elem> &) = delete;
-       memory<Elem> &operator=(const memory<Elem> &) = delete;
+       memory(const memory<Width> &) = delete;
+       memory<Width> &operator=(const memory<Width> &) = delete;
 
        // The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it
        // into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't
@@ -610,7 +593,7 @@ struct memory {
        template<size_t Size>
        struct init {
                size_t offset;
-               typename Elem::init data[Size];
+               value<Width> data[Size];
        };
 
        template<size_t... InitSize>
@@ -621,17 +604,55 @@ struct memory {
                auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...};
        }
 
-       Elem &operator [](size_t index) {
+       value<Width> &operator [](size_t index) {
                assert(index < data.size());
                return data[index];
        }
-};
 
-template<size_t Width>
-using memory_rw = memory<wire<Width>>;
+       const value<Width> &operator [](size_t index) const {
+               assert(index < data.size());
+               return data[index];
+       }
 
-template<size_t Width>
-using memory_ro = memory<const value<Width>>;
+       // A simple way to make a writable memory would be to use an array of wires instead of an array of values.
+       // However, there are two significant downsides to this approach: first, it has large overhead (2× space
+       // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port
+       // priorities. Although in principle write ports could be ordered or conditionally enabled in generated
+       // code based on their priorities and selected addresses, the feedback arc set problem is computationally
+       // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer)
+       // a particular write port evaluation order.
+       //
+       // The approach used here instead is to queue writes into a buffer during the eval phase, then perform
+       // the writes during the commit phase in the priority order. This approach has low overhead, with both space
+       // and time proportional to the amount of write ports. Because virtually every memory in a practical design
+       // has at most two write ports, linear search is used on every write, being the fastest and simplest approach.
+       struct write {
+               size_t index;
+               value<Width> val;
+               value<Width> mask;
+               int priority;
+       };
+       std::vector<write> write_queue;
+
+       void update(size_t index, const value<Width> &val, const value<Width> &mask, int priority = 0) {
+               assert(index < data.size());
+               write_queue.emplace_back(write { index, val, mask, priority });
+       }
+
+       bool commit() {
+               bool changed = false;
+               std::sort(write_queue.begin(), write_queue.end(),
+                       [](const write &a, const write &b) { return a.priority < b.priority; });
+               for (const write &entry : write_queue) {
+                       value<Width> elem = data[entry.index];
+                       elem = elem.update(entry.val, entry.mask);
+                       changed |= (data[entry.index] != elem);
+                       data[entry.index] = elem;
+               }
+               write_queue.clear();
+               return changed;
+       }
+};
 
 struct module {
        module() {}
@@ -1098,15 +1119,19 @@ value<BitsY> mod_ss(const value<BitsA> &a, const value<BitsB> &b) {
 }
 
 // Memory helper
-template<size_t BitsAddr>
-std::pair<bool, size_t> memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
-       static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
-       size_t offset_index = addr.data[0];
+struct memory_index {
+       bool valid;
+       size_t index;
 
-       bool valid = (offset_index >= offset && offset_index < offset + depth);
-       size_t index = offset_index - offset;
-       return std::make_pair(valid, index);
-}
+       template<size_t BitsAddr>
+       memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
+               static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
+               size_t offset_index = addr.data[0];
+
+               valid = (offset_index >= offset && offset_index < offset + depth);
+               index = offset_index - offset;
+       }
+};
 
 } // namespace cxxrtl_yosys