From 01e6850bd3b9c884a0ea9785ff5ff1ffd59b82e2 Mon Sep 17 00:00:00 2001 From: whitequark Date: Sun, 5 Apr 2020 02:06:26 +0000 Subject: [PATCH] write_cxxrtl: improve writable memory handling. This commit reduces space and time overhead for writable memories to O(write port count) in both cases; implements handling for write port priorities; and simplifies runtime representation of memories. --- backends/cxxrtl/cxxrtl.cc | 49 +++++++++--------- backends/cxxrtl/cxxrtl.h | 103 +++++++++++++++++++++++--------------- 2 files changed, 87 insertions(+), 65 deletions(-) diff --git a/backends/cxxrtl/cxxrtl.cc b/backends/cxxrtl/cxxrtl.cc index 710462e96..6fd63548f 100644 --- a/backends/cxxrtl/cxxrtl.cc +++ b/backends/cxxrtl/cxxrtl.cc @@ -827,7 +827,7 @@ struct CxxrtlWorker { } RTLIL::Memory *memory = cell->module->memories[cell->getParam(ID(MEMID)).decode_string()]; std::string valid_index_temp = fresh_temporary(); - f << indent << "std::pair " << valid_index_temp << " = memory_index("; + f << indent << "auto " << valid_index_temp << " = memory_index("; dump_sigspec_rhs(cell->getPort(ID(ADDR))); f << ", " << memory->start_offset << ", " << memory->size << ");\n"; if (cell->type == ID($memrd)) { @@ -844,8 +844,8 @@ struct CxxrtlWorker { // larger program) will never crash the code that calls into it. // // If assertions are disabled, out of bounds reads are defined to return zero. - f << indent << "assert(" << valid_index_temp << ".first && \"out of bounds read\");\n"; - f << indent << "if(" << valid_index_temp << ".first) {\n"; + f << indent << "assert(" << valid_index_temp << ".valid && \"out of bounds read\");\n"; + f << indent << "if(" << valid_index_temp << ".valid) {\n"; inc_indent(); if (writable_memories[memory]) { std::string addr_temp = fresh_temporary(); @@ -854,17 +854,22 @@ struct CxxrtlWorker { f << ";\n"; std::string lhs_temp = fresh_temporary(); f << indent << "value<" << memory->width << "> " << lhs_temp << " = " - << mangle(memory) << "[" << valid_index_temp << ".second].curr;\n"; - for (auto memwr_cell : transparent_for[cell]) { + << mangle(memory) << "[" << valid_index_temp << ".index];\n"; + std::vector memwr_cells(transparent_for[cell].begin(), transparent_for[cell].end()); + std::sort(memwr_cells.begin(), memwr_cells.end(), + [](const RTLIL::Cell *a, const RTLIL::Cell *b) { + return a->getParam(ID(PRIORITY)).as_int() < b->getParam(ID(PRIORITY)).as_int(); + }); + for (auto memwr_cell : memwr_cells) { f << indent << "if (" << addr_temp << " == "; dump_sigspec_rhs(memwr_cell->getPort(ID(ADDR))); f << ") {\n"; inc_indent(); f << indent << lhs_temp << " = " << lhs_temp; f << ".update("; - dump_sigspec_rhs(memwr_cell->getPort(ID(EN))); - f << ", "; dump_sigspec_rhs(memwr_cell->getPort(ID(DATA))); + f << ", "; + dump_sigspec_rhs(memwr_cell->getPort(ID(EN))); f << ");\n"; dec_indent(); f << indent << "}\n"; @@ -875,7 +880,7 @@ struct CxxrtlWorker { } else { f << indent; dump_sigspec_lhs(cell->getPort(ID(DATA))); - f << " = " << mangle(memory) << "[" << valid_index_temp << ".second];\n"; + f << " = " << mangle(memory) << "[" << valid_index_temp << ".index];\n"; } dec_indent(); f << indent << "} else {\n"; @@ -890,22 +895,18 @@ struct CxxrtlWorker { f << indent << "}\n"; } } else /*if (cell->type == ID($memwr))*/ { - // FIXME: handle write port priority, here and above in transparent $memrd cells log_assert(writable_memories[memory]); // See above for rationale of having both the assert and the condition. // // If assertions are disabled, out of bounds writes are defined to do nothing. - f << indent << "assert(" << valid_index_temp << ".first && \"out of bounds write\");\n"; - f << indent << "if (" << valid_index_temp << ".first) {\n"; + f << indent << "assert(" << valid_index_temp << ".valid && \"out of bounds write\");\n"; + f << indent << "if (" << valid_index_temp << ".valid) {\n"; inc_indent(); - std::string lhs_temp = fresh_temporary(); - f << indent << "wire<" << memory->width << "> &" << lhs_temp << " = "; - f << mangle(memory) << "[" << valid_index_temp << ".second];\n"; - f << indent << lhs_temp << ".next = " << lhs_temp << ".curr.update("; - dump_sigspec_rhs(cell->getPort(ID(EN))); - f << ", "; + f << indent << mangle(memory) << ".update(" << valid_index_temp << ".index, "; dump_sigspec_rhs(cell->getPort(ID(DATA))); - f << ");\n"; + f << ", "; + dump_sigspec_rhs(cell->getPort(ID(EN))); + f << ", " << cell->getParam(ID(PRIORITY)).as_int() << ");\n"; dec_indent(); f << indent << "}\n"; } @@ -1122,8 +1123,8 @@ struct CxxrtlWorker { }); dump_attrs(memory); - f << indent << "memory_" << (writable_memories[memory] ? "rw" : "ro") - << "<" << memory->width << "> " << mangle(memory) + f << indent << (writable_memories[memory] ? "" : "const ") + << "memory<" << memory->width << "> " << mangle(memory) << " { " << memory->size << "u"; if (init_cells.empty()) { f << " };\n"; @@ -1135,8 +1136,7 @@ struct CxxrtlWorker { RTLIL::Const data = cell->getPort(ID(DATA)).as_const(); size_t width = cell->getParam(ID(WIDTH)).as_int(); size_t words = cell->getParam(ID(WORDS)).as_int(); - f << indent << "memory_" << (writable_memories[memory] ? "rw" : "ro") - << "<" << memory->width << ">::init<" << words << "> { " + f << indent << "memory<" << memory->width << ">::init<" << words << "> { " << stringf("%#x", cell->getPort(ID(ADDR)).as_int()) << ", {"; inc_indent(); for (size_t n = 0; n < words; n++) { @@ -1257,10 +1257,7 @@ struct CxxrtlWorker { for (auto memory : module->memories) { if (!writable_memories[memory.second]) continue; - f << indent << "for (size_t i = 0; i < " << memory.second->size << "u; i++)\n"; - inc_indent(); - f << indent << "changed |= " << mangle(memory.second) << "[i].commit();\n"; - dec_indent(); + f << indent << "changed |= " << mangle(memory.second) << ".commit();\n"; } for (auto cell : module->cells()) { if (is_internal_cell(cell->type)) diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h index 18e45e22c..593c31c28 100644 --- a/backends/cxxrtl/cxxrtl.h +++ b/backends/cxxrtl/cxxrtl.h @@ -1,7 +1,7 @@ /* * yosys -- Yosys Open SYnthesis Suite * - * Copyright (C) 2019 whitequark + * Copyright (C) 2019-2020 whitequark * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. @@ -28,6 +28,7 @@ #include #include #include +#include #include // The cxxrtl support library implements compile time specialized arbitrary width arithmetics, as well as provides @@ -73,9 +74,6 @@ struct value : public expr_base> { template explicit constexpr value(Init ...init) : data{init...} {} - // This allows using value<> as well as wire<> in memory initializers. - using init = value; - value(const value &) = default; value(value &&) = default; value &operator=(const value &) = default; @@ -297,7 +295,7 @@ struct value : public expr_base> { return result; } - value update(const value &mask, const value &val) const { + value update(const value &val, const value &mask) const { return bit_and(mask.bit_not()).bit_or(val.bit_and(mask)); } @@ -559,19 +557,6 @@ struct wire { wire(wire &&) = default; wire &operator=(const wire &) = delete; - // We want to avoid having operator=(wire<>) or operator=(value<>) that overwrites both curr and next, - // since this operation is almost always wrong. But we also need an operation like that for memory - // initialization. This is solved by adding a wrapper and making the use of operator= valid only when - // this wrapper is used. - struct init { - value data; - }; - - wire &operator=(const init &init) { - curr = next = init.data; - return *this; - } - bool commit() { if (curr != next) { curr = next; @@ -587,12 +572,10 @@ std::ostream &operator<<(std::ostream &os, const wire &val) { return os; } -template +template struct memory { - using StoredElem = typename std::remove_const::type; - std::vector data; + std::vector> data; - static constexpr size_t width = StoredElem::bits; size_t depth() const { return data.size(); } @@ -600,8 +583,8 @@ struct memory { memory() = delete; explicit memory(size_t depth) : data(depth) {} - memory(const memory &) = delete; - memory &operator=(const memory &) = delete; + memory(const memory &) = delete; + memory &operator=(const memory &) = delete; // The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it // into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't @@ -610,7 +593,7 @@ struct memory { template struct init { size_t offset; - typename Elem::init data[Size]; + value data[Size]; }; template @@ -621,17 +604,55 @@ struct memory { auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...}; } - Elem &operator [](size_t index) { + value &operator [](size_t index) { assert(index < data.size()); return data[index]; } -}; -template -using memory_rw = memory>; + const value &operator [](size_t index) const { + assert(index < data.size()); + return data[index]; + } -template -using memory_ro = memory>; + // A simple way to make a writable memory would be to use an array of wires instead of an array of values. + // However, there are two significant downsides to this approach: first, it has large overhead (2× space + // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port + // priorities. Although in principle write ports could be ordered or conditionally enabled in generated + // code based on their priorities and selected addresses, the feedback arc set problem is computationally + // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer) + // a particular write port evaluation order. + // + // The approach used here instead is to queue writes into a buffer during the eval phase, then perform + // the writes during the commit phase in the priority order. This approach has low overhead, with both space + // and time proportional to the amount of write ports. Because virtually every memory in a practical design + // has at most two write ports, linear search is used on every write, being the fastest and simplest approach. + struct write { + size_t index; + value val; + value mask; + int priority; + }; + std::vector write_queue; + + void update(size_t index, const value &val, const value &mask, int priority = 0) { + assert(index < data.size()); + write_queue.emplace_back(write { index, val, mask, priority }); + } + + bool commit() { + bool changed = false; + std::sort(write_queue.begin(), write_queue.end(), + [](const write &a, const write &b) { return a.priority < b.priority; }); + for (const write &entry : write_queue) { + value elem = data[entry.index]; + elem = elem.update(entry.val, entry.mask); + changed |= (data[entry.index] != elem); + data[entry.index] = elem; + } + write_queue.clear(); + return changed; + } +}; struct module { module() {} @@ -1098,15 +1119,19 @@ value mod_ss(const value &a, const value &b) { } // Memory helper -template -std::pair memory_index(const value &addr, size_t offset, size_t depth) { - static_assert(value::chunks <= 1, "memory address is too wide"); - size_t offset_index = addr.data[0]; +struct memory_index { + bool valid; + size_t index; - bool valid = (offset_index >= offset && offset_index < offset + depth); - size_t index = offset_index - offset; - return std::make_pair(valid, index); -} + template + memory_index(const value &addr, size_t offset, size_t depth) { + static_assert(value::chunks <= 1, "memory address is too wide"); + size_t offset_index = addr.data[0]; + + valid = (offset_index >= offset && offset_index < offset + depth); + index = offset_index - offset; + } +}; } // namespace cxxrtl_yosys -- 2.30.2