From 7c5dba8b77571aa6a2c0c38eeed7963b401d1cbd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marcelina=20Ko=C5=9Bcielnicka?= Date: Sun, 6 Feb 2022 10:10:21 +0100 Subject: [PATCH] Add memory_libmap pass. --- kernel/constids.inc | 12 + passes/memory/Makefile.inc | 2 + passes/memory/memlib.cc | 1101 +++++++++++++++++ passes/memory/memlib.h | 171 +++ passes/memory/memlib.md | 505 ++++++++ passes/memory/memory_libmap.cc | 2093 ++++++++++++++++++++++++++++++++ 6 files changed, 3884 insertions(+) create mode 100644 passes/memory/memlib.cc create mode 100644 passes/memory/memlib.h create mode 100644 passes/memory/memlib.md create mode 100644 passes/memory/memory_libmap.cc diff --git a/kernel/constids.inc b/kernel/constids.inc index 566b76217..d822c078b 100644 --- a/kernel/constids.inc +++ b/kernel/constids.inc @@ -29,10 +29,12 @@ X(A_SIGNED) X(A_WIDTH) X(B) X(BI) +X(BITS_USED) X(blackbox) X(B_SIGNED) X(bugpoint_keep) X(B_WIDTH) +X(BYTE) X(C) X(cells_not_processed) X(CE_OVER_SRST) @@ -116,6 +118,8 @@ X(keep_hierarchy) X(L) X(lib_whitebox) X(localparam) +X(logic_block) +X(lram) X(LUT) X(lut_keep) X(M) @@ -145,6 +149,9 @@ X(PRIORITY_MASK) X(Q) X(qwp_position) X(R) +X(ram_block) +X(ram_style) +X(ramstyle) X(RD_ADDR) X(RD_ARST) X(RD_ARST_VALUE) @@ -164,6 +171,9 @@ X(RD_TRANSPARENT) X(RD_WIDE_CONTINUATION) X(reg) X(reprocess_after) +X(rom_block) +X(rom_style) +X(romstyle) X(S) X(SET) X(SET_POLARITY) @@ -186,6 +196,8 @@ X(STATE_NUM_LOG2) X(STATE_RST) X(STATE_TABLE) X(submod) +X(syn_ramstyle) +X(syn_romstyle) X(S_WIDTH) X(T) X(TABLE) diff --git a/passes/memory/Makefile.inc b/passes/memory/Makefile.inc index 5a2c4ecfc..9a37394cf 100644 --- a/passes/memory/Makefile.inc +++ b/passes/memory/Makefile.inc @@ -9,4 +9,6 @@ OBJS += passes/memory/memory_map.o OBJS += passes/memory/memory_memx.o OBJS += passes/memory/memory_nordff.o OBJS += passes/memory/memory_narrow.o +OBJS += passes/memory/memory_libmap.o +OBJS += passes/memory/memlib.o diff --git a/passes/memory/memlib.cc b/passes/memory/memlib.cc new file mode 100644 index 000000000..8a7adc9ac --- /dev/null +++ b/passes/memory/memlib.cc @@ -0,0 +1,1101 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2021 Marcelina Kościelnicka + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "memlib.h" + +#include + +USING_YOSYS_NAMESPACE + +using namespace MemLibrary; + +PRIVATE_NAMESPACE_BEGIN + +typedef dict Options; + +struct ClockDef { + ClkPolKind kind; + std::string name; +}; + +struct RawWrTransDef { + WrTransTargetKind target_kind; + std::string target_group; + WrTransKind kind; +}; + +struct PortWidthDef { + bool tied; + std::vector wr_widths; + std::vector rd_widths; +}; + +struct SrstDef { + ResetValKind val; + SrstKind kind; + bool block_wr; +}; + +struct Empty {}; + +template struct Capability { + T val; + Options opts, portopts; + + Capability(T val, Options opts, Options portopts) : val(val), opts(opts), portopts(portopts) {} +}; + +template using Caps = std::vector>; + +struct PortGroupDef { + PortKind kind; + dict> portopts; + std::vector names; + Caps forbid; + Caps clock; + Caps clken; + Caps wrbe_separate; + Caps width; + Caps rden; + Caps rdwr; + Caps rdinit; + Caps rdarst; + Caps rdsrst; + Caps wrprio; + Caps wrtrans; + Caps optional; + Caps optional_rw; +}; + +struct WidthsDef { + std::vector widths; + WidthMode mode; +}; + +struct ResourceDef { + std::string name; + int count; +}; + +struct RamDef { + IdString id; + dict> opts; + RamKind kind; + Caps forbid; + Caps prune_rom; + Caps ports; + Caps abits; + Caps widths; + Caps resource; + Caps cost; + Caps widthscale; + Caps byte; + Caps init; + Caps style; +}; + +struct Parser { + std::string filename; + std::ifstream infile; + int line_number = 0; + Library &lib; + const pool &defines; + pool &defines_unused; + std::vector tokens; + int token_idx = 0; + bool eof = false; + + std::vector> option_stack; + std::vector> portoption_stack; + RamDef ram; + PortGroupDef port; + bool active = true; + + Parser(std::string filename, Library &lib, const pool &defines, pool &defines_unused) : filename(filename), lib(lib), defines(defines), defines_unused(defines_unused) { + // Note: this rewrites the filename we're opening, but not + // the one we're storing — this is actually correct, so that + // we keep the original filename for diagnostics. + rewrite_filename(filename); + infile.open(filename); + if (infile.fail()) { + log_error("failed to open %s\n", filename.c_str()); + } + parse(); + infile.close(); + } + + std::string peek_token() { + if (eof) + return ""; + + if (token_idx < GetSize(tokens)) + return tokens[token_idx]; + + tokens.clear(); + token_idx = 0; + + std::string line; + while (std::getline(infile, line)) { + line_number++; + for (string tok = next_token(line); !tok.empty(); tok = next_token(line)) { + if (tok[0] == '#') + break; + if (tok[tok.size()-1] == ';') { + tokens.push_back(tok.substr(0, tok.size()-1)); + tokens.push_back(";"); + } else { + tokens.push_back(tok); + } + } + if (!tokens.empty()) + return tokens[token_idx]; + } + + eof = true; + return ""; + } + + std::string get_token() { + std::string res = peek_token(); + if (!eof) + token_idx++; + return res; + } + + void eat_token(std::string expected) { + std::string token = get_token(); + if (token != expected) { + log_error("%s:%d: expected `%s`, got `%s`.\n", filename.c_str(), line_number, expected.c_str(), token.c_str()); + } + } + + IdString get_id() { + std::string token = get_token(); + if (token.empty() || (token[0] != '$' && token[0] != '\\')) { + log_error("%s:%d: expected id string, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + return IdString(token); + } + + std::string get_name() { + std::string res = get_token(); + bool valid = true; + // Basic sanity check. + if (res.empty() || (!isalpha(res[0]) && res[0] != '_')) + valid = false; + for (char c: res) + if (!isalnum(c) && c != '_') + valid = false; + if (!valid) + log_error("%s:%d: expected name, got `%s`.\n", filename.c_str(), line_number, res.c_str()); + return res; + } + + std::string get_string() { + std::string token = get_token(); + if (token.size() < 2 || token[0] != '"' || token[token.size()-1] != '"') { + log_error("%s:%d: expected string, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + return token.substr(1, token.size()-2); + } + + bool peek_string() { + std::string token = peek_token(); + return !token.empty() && token[0] == '"'; + } + + int get_int() { + std::string token = get_token(); + char *endptr; + long res = strtol(token.c_str(), &endptr, 0); + if (token.empty() || *endptr || res > INT_MAX) { + log_error("%s:%d: expected int, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + return res; + } + + double get_double() { + std::string token = get_token(); + char *endptr; + double res = strtod(token.c_str(), &endptr); + if (token.empty() || *endptr) { + log_error("%s:%d: expected float, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + return res; + } + + bool peek_int() { + std::string token = peek_token(); + return !token.empty() && isdigit(token[0]); + } + + void get_semi() { + std::string token = get_token(); + if (token != ";") { + log_error("%s:%d: expected `;`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + } + + Const get_value() { + std::string token = peek_token(); + if (!token.empty() && token[0] == '"') { + std::string s = get_string(); + return Const(s); + } else { + return Const(get_int()); + } + } + + bool enter_ifdef(bool polarity) { + bool res = active; + std::string name = get_name(); + defines_unused.erase(name); + if (active) { + if (defines.count(name)) { + active = polarity; + } else { + active = !polarity; + } + } + return res; + } + + void enter_else(bool save) { + get_token(); + active = !active && save; + } + + void enter_option() { + std::string name = get_string(); + Const val = get_value(); + if (active) { + ram.opts[name].insert(val); + } + option_stack.push_back({name, val}); + } + + void exit_option() { + option_stack.pop_back(); + } + + Options get_options() { + Options res; + for (auto it: option_stack) + res[it.first] = it.second; + return res; + } + + void enter_portoption() { + std::string name = get_string(); + Const val = get_value(); + if (active) { + port.portopts[name].insert(val); + } + portoption_stack.push_back({name, val}); + } + + void exit_portoption() { + portoption_stack.pop_back(); + } + + Options get_portoptions() { + Options res; + for (auto it: portoption_stack) + res[it.first] = it.second; + return res; + } + + template void add_cap(Caps &caps, T val) { + if (active) + caps.push_back(Capability(val, get_options(), get_portoptions())); + } + + void parse_port_block() { + if (peek_token() == "{") { + get_token(); + while (peek_token() != "}") + parse_port_item(); + get_token(); + } else { + parse_port_item(); + } + } + + void parse_ram_block() { + if (peek_token() == "{") { + get_token(); + while (peek_token() != "}") + parse_ram_item(); + get_token(); + } else { + parse_ram_item(); + } + } + + void parse_top_block() { + if (peek_token() == "{") { + get_token(); + while (peek_token() != "}") + parse_top_item(); + get_token(); + } else { + parse_top_item(); + } + } + + void parse_port_item() { + std::string token = get_token(); + if (token == "ifdef") { + bool save = enter_ifdef(true); + parse_port_block(); + if (peek_token() == "else") { + enter_else(save); + parse_port_block(); + } + active = save; + } else if (token == "ifndef") { + bool save = enter_ifdef(false); + parse_port_block(); + if (peek_token() == "else") { + enter_else(save); + parse_port_block(); + } + active = save; + } else if (token == "option") { + enter_option(); + parse_port_block(); + exit_option(); + } else if (token == "portoption") { + enter_portoption(); + parse_port_block(); + exit_portoption(); + } else if (token == "clock") { + if (port.kind == PortKind::Ar) { + log_error("%s:%d: `clock` not allowed in async read port.\n", filename.c_str(), line_number); + } + ClockDef def; + token = get_token(); + if (token == "anyedge") { + def.kind = ClkPolKind::Anyedge; + } else if (token == "posedge") { + def.kind = ClkPolKind::Posedge; + } else if (token == "negedge") { + def.kind = ClkPolKind::Negedge; + } else { + log_error("%s:%d: expected `posedge`, `negedge`, or `anyedge`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + if (peek_string()) { + def.name = get_string(); + } + get_semi(); + add_cap(port.clock, def); + } else if (token == "clken") { + if (port.kind == PortKind::Ar) { + log_error("%s:%d: `clken` not allowed in async read port.\n", filename.c_str(), line_number); + } + get_semi(); + add_cap(port.clken, {}); + } else if (token == "wrbe_separate") { + if (port.kind == PortKind::Ar || port.kind == PortKind::Sr) { + log_error("%s:%d: `wrbe_separate` not allowed in read port.\n", filename.c_str(), line_number); + } + get_semi(); + add_cap(port.wrbe_separate, {}); + } else if (token == "width") { + PortWidthDef def; + token = peek_token(); + bool is_rw = port.kind == PortKind::Srsw || port.kind == PortKind::Arsw; + if (token == "tied") { + get_token(); + if (!is_rw) + log_error("%s:%d: `tied` only makes sense for read+write ports.\n", filename.c_str(), line_number); + while (peek_int()) + def.wr_widths.push_back(get_int()); + def.tied = true; + } else if (token == "mix") { + get_token(); + if (!is_rw) + log_error("%s:%d: `mix` only makes sense for read+write ports.\n", filename.c_str(), line_number); + while (peek_int()) + def.wr_widths.push_back(get_int()); + def.rd_widths = def.wr_widths; + def.tied = false; + } else if (token == "rd") { + get_token(); + if (!is_rw) + log_error("%s:%d: `rd` only makes sense for read+write ports.\n", filename.c_str(), line_number); + do { + def.rd_widths.push_back(get_int()); + } while (peek_int()); + eat_token("wr"); + do { + def.wr_widths.push_back(get_int()); + } while (peek_int()); + def.tied = false; + } else if (token == "wr") { + get_token(); + if (!is_rw) + log_error("%s:%d: `wr` only makes sense for read+write ports.\n", filename.c_str(), line_number); + do { + def.wr_widths.push_back(get_int()); + } while (peek_int()); + eat_token("rd"); + do { + def.rd_widths.push_back(get_int()); + } while (peek_int()); + def.tied = false; + } else { + do { + def.wr_widths.push_back(get_int()); + } while (peek_int()); + def.tied = true; + } + get_semi(); + add_cap(port.width, def); + } else if (token == "rden") { + if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw) + log_error("%s:%d: `rden` only allowed on sync read ports.\n", filename.c_str(), line_number); + get_semi(); + add_cap(port.rden, {}); + } else if (token == "rdwr") { + if (port.kind != PortKind::Srsw) + log_error("%s:%d: `rdwr` only allowed on sync read+write ports.\n", filename.c_str(), line_number); + RdWrKind kind; + token = get_token(); + if (token == "undefined") { + kind = RdWrKind::Undefined; + } else if (token == "no_change") { + kind = RdWrKind::NoChange; + } else if (token == "new") { + kind = RdWrKind::New; + } else if (token == "old") { + kind = RdWrKind::Old; + } else if (token == "new_only") { + kind = RdWrKind::NewOnly; + } else { + log_error("%s:%d: expected `undefined`, `new`, `old`, `new_only`, or `no_change`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + get_semi(); + add_cap(port.rdwr, kind); + } else if (token == "rdinit") { + if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw) + log_error("%s:%d: `%s` only allowed on sync read ports.\n", filename.c_str(), line_number, token.c_str()); + ResetValKind kind; + token = get_token(); + if (token == "none") { + kind = ResetValKind::None; + } else if (token == "zero") { + kind = ResetValKind::Zero; + } else if (token == "any") { + kind = ResetValKind::Any; + } else if (token == "no_undef") { + kind = ResetValKind::NoUndef; + } else { + log_error("%s:%d: expected `none`, `zero`, `any`, or `no_undef`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + get_semi(); + add_cap(port.rdinit, kind); + } else if (token == "rdarst") { + if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw) + log_error("%s:%d: `%s` only allowed on sync read ports.\n", filename.c_str(), line_number, token.c_str()); + ResetValKind kind; + token = get_token(); + if (token == "none") { + kind = ResetValKind::None; + } else if (token == "zero") { + kind = ResetValKind::Zero; + } else if (token == "any") { + kind = ResetValKind::Any; + } else if (token == "no_undef") { + kind = ResetValKind::NoUndef; + } else if (token == "init") { + kind = ResetValKind::Init; + } else { + log_error("%s:%d: expected `none`, `zero`, `any`, `no_undef`, or `init`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + get_semi(); + add_cap(port.rdarst, kind); + } else if (token == "rdsrst") { + if (port.kind != PortKind::Sr && port.kind != PortKind::Srsw) + log_error("%s:%d: `%s` only allowed on sync read ports.\n", filename.c_str(), line_number, token.c_str()); + SrstDef def; + token = get_token(); + if (token == "none") { + def.val = ResetValKind::None; + } else if (token == "zero") { + def.val = ResetValKind::Zero; + } else if (token == "any") { + def.val = ResetValKind::Any; + } else if (token == "no_undef") { + def.val = ResetValKind::NoUndef; + } else if (token == "init") { + def.val = ResetValKind::Init; + } else { + log_error("%s:%d: expected `none`, `zero`, `any`, `no_undef`, or `init`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + if (def.val == ResetValKind::None) { + def.kind = SrstKind::None; + } else { + token = get_token(); + if (token == "ungated") { + def.kind = SrstKind::Ungated; + } else if (token == "gated_clken") { + def.kind = SrstKind::GatedClkEn; + } else if (token == "gated_rden") { + def.kind = SrstKind::GatedRdEn; + } else { + log_error("%s:%d: expected `ungated`, `gated_clken` or `gated_rden`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + } + def.block_wr = false; + if (peek_token() == "block_wr") { + get_token(); + def.block_wr = true; + } + get_semi(); + add_cap(port.rdsrst, def); + } else if (token == "wrprio") { + if (port.kind == PortKind::Ar || port.kind == PortKind::Sr) + log_error("%s:%d: `wrprio` only allowed on write ports.\n", filename.c_str(), line_number); + do { + add_cap(port.wrprio, get_string()); + } while (peek_string()); + get_semi(); + } else if (token == "wrtrans") { + if (port.kind == PortKind::Ar || port.kind == PortKind::Sr) + log_error("%s:%d: `wrtrans` only allowed on write ports.\n", filename.c_str(), line_number); + token = peek_token(); + RawWrTransDef def; + if (token == "all") { + def.target_kind = WrTransTargetKind::All; + get_token(); + } else { + def.target_kind = WrTransTargetKind::Group; + def.target_group = get_string(); + } + token = get_token(); + if (token == "new") { + def.kind = WrTransKind::New; + } else if (token == "old") { + def.kind = WrTransKind::Old; + } else { + log_error("%s:%d: expected `new` or `old`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + get_semi(); + add_cap(port.wrtrans, def); + } else if (token == "forbid") { + get_semi(); + add_cap(port.forbid, {}); + } else if (token == "optional") { + get_semi(); + add_cap(port.optional, {}); + } else if (token == "optional_rw") { + get_semi(); + add_cap(port.optional_rw, {}); + } else if (token == "") { + log_error("%s:%d: unexpected EOF while parsing port item.\n", filename.c_str(), line_number); + } else { + log_error("%s:%d: unknown port-level item `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + } + + void parse_ram_item() { + std::string token = get_token(); + if (token == "ifdef") { + bool save = enter_ifdef(true); + parse_ram_block(); + if (peek_token() == "else") { + enter_else(save); + parse_ram_block(); + } + active = save; + } else if (token == "ifndef") { + bool save = enter_ifdef(false); + parse_ram_block(); + if (peek_token() == "else") { + enter_else(save); + parse_ram_block(); + } + active = save; + } else if (token == "option") { + enter_option(); + parse_ram_block(); + exit_option(); + } else if (token == "prune_rom") { + get_semi(); + add_cap(ram.prune_rom, {}); + } else if (token == "forbid") { + get_semi(); + add_cap(ram.forbid, {}); + } else if (token == "abits") { + int val = get_int(); + if (val < 0) + log_error("%s:%d: abits %d nagative.\n", filename.c_str(), line_number, val); + get_semi(); + add_cap(ram.abits, val); + } else if (token == "width") { + WidthsDef def; + int w = get_int(); + if (w <= 0) + log_error("%s:%d: width %d not positive.\n", filename.c_str(), line_number, w); + def.widths.push_back(w); + def.mode = WidthMode::Single; + get_semi(); + add_cap(ram.widths, def); + } else if (token == "widths") { + WidthsDef def; + int last = 0; + do { + int w = get_int(); + if (w <= 0) + log_error("%s:%d: width %d not positive.\n", filename.c_str(), line_number, w); + if (w < last * 2) + log_error("%s:%d: width %d smaller than %d required for progression.\n", filename.c_str(), line_number, w, last * 2); + last = w; + def.widths.push_back(w); + } while(peek_int()); + token = get_token(); + if (token == "global") { + def.mode = WidthMode::Global; + } else if (token == "per_port") { + def.mode = WidthMode::PerPort; + } else { + log_error("%s:%d: expected `global`, or `per_port`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + get_semi(); + add_cap(ram.widths, def); + } else if (token == "resource") { + ResourceDef def; + def.name = get_string(); + if (peek_int()) + def.count = get_int(); + else + def.count = 1; + get_semi(); + add_cap(ram.resource, def); + } else if (token == "cost") { + add_cap(ram.cost, get_double()); + get_semi(); + } else if (token == "widthscale") { + if (peek_int()) { + add_cap(ram.widthscale, get_double()); + } else { + add_cap(ram.widthscale, 0.0); + } + get_semi(); + } else if (token == "byte") { + int val = get_int(); + if (val <= 0) + log_error("%s:%d: byte width %d not positive.\n", filename.c_str(), line_number, val); + add_cap(ram.byte, val); + get_semi(); + } else if (token == "init") { + MemoryInitKind kind; + token = get_token(); + if (token == "zero") { + kind = MemoryInitKind::Zero; + } else if (token == "any") { + kind = MemoryInitKind::Any; + } else if (token == "no_undef") { + kind = MemoryInitKind::NoUndef; + } else if (token == "none") { + kind = MemoryInitKind::None; + } else { + log_error("%s:%d: expected `zero`, `any`, `none`, or `no_undef`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + get_semi(); + add_cap(ram.init, kind); + } else if (token == "style") { + do { + std::string val = get_string(); + for (auto &c: val) + c = std::tolower(c); + add_cap(ram.style, val); + } while (peek_string()); + get_semi(); + } else if (token == "port") { + port = PortGroupDef(); + token = get_token(); + if (token == "ar") { + port.kind = PortKind::Ar; + } else if (token == "sr") { + port.kind = PortKind::Sr; + } else if (token == "sw") { + port.kind = PortKind::Sw; + } else if (token == "arsw") { + port.kind = PortKind::Arsw; + } else if (token == "srsw") { + port.kind = PortKind::Srsw; + } else { + log_error("%s:%d: expected `ar`, `sr`, `sw`, `arsw`, or `srsw`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + do { + port.names.push_back(get_string()); + } while (peek_string()); + parse_port_block(); + if (active) + add_cap(ram.ports, port); + } else if (token == "") { + log_error("%s:%d: unexpected EOF while parsing ram item.\n", filename.c_str(), line_number); + } else { + log_error("%s:%d: unknown ram-level item `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + } + + void parse_top_item() { + std::string token = get_token(); + if (token == "ifdef") { + bool save = enter_ifdef(true); + parse_top_block(); + if (peek_token() == "else") { + enter_else(save); + parse_top_block(); + } + active = save; + } else if (token == "ifndef") { + bool save = enter_ifdef(false); + parse_top_block(); + if (peek_token() == "else") { + enter_else(save); + parse_top_block(); + } + active = save; + } else if (token == "ram") { + int orig_line = line_number; + ram = RamDef(); + token = get_token(); + if (token == "distributed") { + ram.kind = RamKind::Distributed; + } else if (token == "block") { + ram.kind = RamKind::Block; + } else if (token == "huge") { + ram.kind = RamKind::Huge; + } else { + log_error("%s:%d: expected `distributed`, `block`, or `huge`, got `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + ram.id = get_id(); + parse_ram_block(); + if (active) { + compile_ram(orig_line); + } + } else if (token == "") { + log_error("%s:%d: unexpected EOF while parsing top item.\n", filename.c_str(), line_number); + } else { + log_error("%s:%d: unknown top-level item `%s`.\n", filename.c_str(), line_number, token.c_str()); + } + } + + bool opts_ok(const Options &def, const Options &var) { + for (auto &it: def) + if (var.at(it.first) != it.second) + return false; + return true; + } + + template const T *find_single_cap(const Caps &caps, const Options &opts, const Options &portopts, const char *name) { + const T *res = nullptr; + for (auto &cap: caps) { + if (!opts_ok(cap.opts, opts)) + continue; + if (!opts_ok(cap.portopts, portopts)) + continue; + if (res) + log_error("%s:%d: duplicate %s cap.\n", filename.c_str(), line_number, name); + res = &cap.val; + } + return res; + } + + std::vector make_opt_combinations(const dict> &opts) { + std::vector res; + res.push_back(Options()); + for (auto &it: opts) { + std::vector new_res; + for (auto &val: it.second) { + for (Options o: res) { + o[it.first] = val; + new_res.push_back(o); + } + } + res = new_res; + } + return res; + } + + void compile_portgroup(Ram &cram, PortGroupDef &pdef, dict &clk_ids, const dict &port_ids, int orig_line) { + PortGroup grp; + grp.optional = find_single_cap(pdef.optional, cram.options, Options(), "optional"); + grp.optional_rw = find_single_cap(pdef.optional_rw, cram.options, Options(), "optional_rw"); + grp.names = pdef.names; + for (auto portopts: make_opt_combinations(pdef.portopts)) { + bool forbidden = false; + for (auto &fdef: ram.forbid) { + if (opts_ok(fdef.opts, cram.options) && opts_ok(fdef.portopts, portopts)) { + forbidden = true; + } + } + if (forbidden) + continue; + PortVariant var; + var.options = portopts; + var.kind = pdef.kind; + if (pdef.kind != PortKind::Ar) { + const ClockDef *cdef = find_single_cap(pdef.clock, cram.options, portopts, "clock"); + if (!cdef) + log_error("%s:%d: missing clock capability.\n", filename.c_str(), orig_line); + var.clk_pol = cdef->kind; + if (cdef->name.empty()) { + var.clk_shared = -1; + } else { + auto it = clk_ids.find(cdef->name); + bool anyedge = cdef->kind == ClkPolKind::Anyedge; + if (it == clk_ids.end()) { + clk_ids[cdef->name] = var.clk_shared = GetSize(cram.shared_clocks); + RamClock clk; + clk.name = cdef->name; + clk.anyedge = anyedge; + cram.shared_clocks.push_back(clk); + } else { + var.clk_shared = it->second; + if (cram.shared_clocks[var.clk_shared].anyedge != anyedge) { + log_error("%s:%d: named clock \"%s\" used with both posedge/negedge and anyedge clocks.\n", filename.c_str(), orig_line, cdef->name.c_str()); + } + } + } + var.clk_en = find_single_cap(pdef.clken, cram.options, portopts, "clken"); + } + const PortWidthDef *wdef = find_single_cap(pdef.width, cram.options, portopts, "width"); + if (wdef) { + if (cram.width_mode != WidthMode::PerPort) + log_error("%s:%d: per-port width doesn't make sense for tied dbits.\n", filename.c_str(), orig_line); + compile_widths(var, cram.dbits, *wdef); + } else { + var.width_tied = true; + var.min_wr_wide_log2 = 0; + var.min_rd_wide_log2 = 0; + var.max_wr_wide_log2 = GetSize(cram.dbits) - 1; + var.max_rd_wide_log2 = GetSize(cram.dbits) - 1; + } + if (pdef.kind == PortKind::Srsw || pdef.kind == PortKind::Sr) { + const RdWrKind *rdwr = find_single_cap(pdef.rdwr, cram.options, portopts, "rdwr"); + var.rdwr = rdwr ? *rdwr : RdWrKind::Undefined; + var.rd_en = find_single_cap(pdef.rden, cram.options, portopts, "rden"); + const ResetValKind *iv = find_single_cap(pdef.rdinit, cram.options, portopts, "rdinit"); + var.rdinitval = iv ? *iv : ResetValKind::None; + const ResetValKind *arv = find_single_cap(pdef.rdarst, cram.options, portopts, "rdarst"); + var.rdarstval = arv ? *arv : ResetValKind::None; + const SrstDef *srv = find_single_cap(pdef.rdsrst, cram.options, portopts, "rdsrst"); + if (srv) { + var.rdsrstval = srv->val; + var.rdsrstmode = srv->kind; + var.rdsrst_block_wr = srv->block_wr; + if (srv->kind == SrstKind::GatedClkEn && !var.clk_en) + log_error("%s:%d: `gated_clken` used without `clken`.\n", filename.c_str(), orig_line); + if (srv->kind == SrstKind::GatedRdEn && !var.rd_en) + log_error("%s:%d: `gated_rden` used without `rden`.\n", filename.c_str(), orig_line); + } else { + var.rdsrstval = ResetValKind::None; + var.rdsrstmode = SrstKind::None; + var.rdsrst_block_wr = false; + } + if (var.rdarstval == ResetValKind::Init || var.rdsrstval == ResetValKind::Init) { + if (var.rdinitval != ResetValKind::Any && var.rdinitval != ResetValKind::NoUndef) { + log_error("%s:%d: reset value `init` has to be paired with `any` or `no_undef` initial value.\n", filename.c_str(), orig_line); + } + } + } + var.wrbe_separate = find_single_cap(pdef.wrbe_separate, cram.options, portopts, "wrbe_separate"); + if (var.wrbe_separate && cram.byte == 0) { + log_error("%s:%d: `wrbe_separate` used without `byte`.\n", filename.c_str(), orig_line); + } + for (auto &def: pdef.wrprio) { + if (!opts_ok(def.opts, cram.options)) + continue; + if (!opts_ok(def.portopts, portopts)) + continue; + var.wrprio.push_back(port_ids.at(def.val)); + } + for (auto &def: pdef.wrtrans) { + if (!opts_ok(def.opts, cram.options)) + continue; + if (!opts_ok(def.portopts, portopts)) + continue; + WrTransDef tdef; + tdef.target_kind = def.val.target_kind; + if (def.val.target_kind == WrTransTargetKind::Group) + tdef.target_group = port_ids.at(def.val.target_group); + tdef.kind = def.val.kind; + var.wrtrans.push_back(tdef); + } + grp.variants.push_back(var); + } + if (grp.variants.empty()) { + log_error("%s:%d: all port option combinations are forbidden.\n", filename.c_str(), orig_line); + } + cram.port_groups.push_back(grp); + } + + void compile_ram(int orig_line) { + if (ram.abits.empty()) + log_error("%s:%d: `dims` capability should be specified.\n", filename.c_str(), orig_line); + if (ram.widths.empty()) + log_error("%s:%d: `widths` capability should be specified.\n", filename.c_str(), orig_line); + if (ram.ports.empty()) + log_error("%s:%d: at least one port group should be specified.\n", filename.c_str(), orig_line); + for (auto opts: make_opt_combinations(ram.opts)) { + bool forbidden = false; + for (auto &fdef: ram.forbid) { + if (opts_ok(fdef.opts, opts)) { + forbidden = true; + } + } + if (forbidden) + continue; + Ram cram; + cram.id = ram.id; + cram.kind = ram.kind; + cram.options = opts; + cram.prune_rom = find_single_cap(ram.prune_rom, opts, Options(), "prune_rom"); + const int *abits = find_single_cap(ram.abits, opts, Options(), "abits"); + if (!abits) + continue; + cram.abits = *abits; + const WidthsDef *widths = find_single_cap(ram.widths, opts, Options(), "widths"); + if (!widths) + continue; + cram.dbits = widths->widths; + cram.width_mode = widths->mode; + const ResourceDef *resource = find_single_cap(ram.resource, opts, Options(), "resource"); + if (resource) { + cram.resource_name = resource->name; + cram.resource_count = resource->count; + } else { + cram.resource_count = 1; + } + cram.cost = 0; + for (auto &cap: ram.cost) { + if (opts_ok(cap.opts, opts)) + cram.cost += cap.val; + } + const double *widthscale = find_single_cap(ram.widthscale, opts, Options(), "widthscale"); + if (widthscale) + cram.widthscale = *widthscale ? *widthscale : cram.cost; + else + cram.widthscale = 0; + const int *byte = find_single_cap(ram.byte, opts, Options(), "byte"); + cram.byte = byte ? *byte : 0; + if (GetSize(cram.dbits) - 1 > cram.abits) + log_error("%s:%d: abits %d too small for dbits progression.\n", filename.c_str(), line_number, cram.abits); + validate_byte(widths->widths, cram.byte); + const MemoryInitKind *ik = find_single_cap(ram.init, opts, Options(), "init"); + cram.init = ik ? *ik : MemoryInitKind::None; + for (auto &sdef: ram.style) + if (opts_ok(sdef.opts, opts)) + cram.style.push_back(sdef.val); + dict port_ids; + int ctr = 0; + for (auto &pdef: ram.ports) { + if (!opts_ok(pdef.opts, opts)) + continue; + for (auto &name: pdef.val.names) + port_ids[name] = ctr; + ctr++; + } + dict clk_ids; + for (auto &pdef: ram.ports) { + if (!opts_ok(pdef.opts, opts)) + continue; + compile_portgroup(cram, pdef.val, clk_ids, port_ids, orig_line); + } + lib.rams.push_back(cram); + } + } + + void validate_byte(const std::vector &widths, int byte) { + if (byte == 0) + return; + if (byte >= widths.back()) + return; + if (widths[0] % byte == 0) { + for (int j = 1; j < GetSize(widths); j++) + if (widths[j] % byte != 0) + log_error("%s:%d: width progression past byte width %d is not divisible.\n", filename.c_str(), line_number, byte); + return; + } + for (int i = 0; i < GetSize(widths); i++) { + if (widths[i] == byte) { + for (int j = i + 1; j < GetSize(widths); j++) + if (widths[j] % byte != 0) + log_error("%s:%d: width progression past byte width %d is not divisible.\n", filename.c_str(), line_number, byte); + return; + } + } + log_error("%s:%d: byte width %d invalid for dbits.\n", filename.c_str(), line_number, byte); + } + + void compile_widths(PortVariant &var, const std::vector &widths, const PortWidthDef &width) { + var.width_tied = width.tied; + auto wr_widths = compile_widthdef(widths, width.wr_widths); + var.min_wr_wide_log2 = wr_widths.first; + var.max_wr_wide_log2 = wr_widths.second; + if (width.tied) { + var.min_rd_wide_log2 = wr_widths.first; + var.max_rd_wide_log2 = wr_widths.second; + } else { + auto rd_widths = compile_widthdef(widths, width.rd_widths); + var.min_rd_wide_log2 = rd_widths.first; + var.max_rd_wide_log2 = rd_widths.second; + } + } + + std::pair compile_widthdef(const std::vector &dbits, const std::vector &widths) { + if (widths.empty()) + return {0, GetSize(dbits) - 1}; + for (int i = 0; i < GetSize(dbits); i++) { + if (dbits[i] == widths[0]) { + for (int j = 0; j < GetSize(widths); j++) { + if (i+j >= GetSize(dbits) || dbits[i+j] != widths[j]) { + log_error("%s:%d: port width %d doesn't match dbits progression.\n", filename.c_str(), line_number, widths[j]); + } + } + return {i, i + GetSize(widths) - 1}; + } + } + log_error("%s:%d: port width %d invalid for dbits.\n", filename.c_str(), line_number, widths[0]); + } + + void parse() { + while (peek_token() != "") + parse_top_item(); + } +}; + +PRIVATE_NAMESPACE_END + +Library MemLibrary::parse_library(const std::vector &filenames, const pool &defines) { + Library res; + pool defines_unused = defines; + for (auto &file: filenames) { + Parser(file, res, defines, defines_unused); + } + for (auto def: defines_unused) { + log_warning("define %s not used in the library.\n", def.c_str()); + } + return res; +} diff --git a/passes/memory/memlib.h b/passes/memory/memlib.h new file mode 100644 index 000000000..c3f7728f1 --- /dev/null +++ b/passes/memory/memlib.h @@ -0,0 +1,171 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2021 Marcelina Kościelnicka + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef MEMLIB_H +#define MEMLIB_H + +#include +#include + +#include "kernel/yosys.h" + +YOSYS_NAMESPACE_BEGIN + +namespace MemLibrary { + +enum class RamKind { + Auto, + Logic, + NotLogic, + Distributed, + Block, + Huge, +}; + +enum class WidthMode { + Single, + Global, + PerPort, +}; + +enum class MemoryInitKind { + None, + Zero, + Any, + NoUndef, +}; + +enum class PortKind { + Sr, + Ar, + Sw, + Srsw, + Arsw, +}; + +enum class ClkPolKind { + Anyedge, + Posedge, + Negedge, +}; + +enum class RdWrKind { + Undefined, + NoChange, + New, + Old, + NewOnly, +}; + +enum class ResetValKind { + None, + Zero, + Any, + NoUndef, + Init, +}; + +enum class SrstKind { + None, + Ungated, + GatedClkEn, + GatedRdEn, +}; + +enum class WrTransTargetKind { + All, + Group, +}; + +enum class WrTransKind { + New, + Old, +}; + +struct WrTransDef { + WrTransTargetKind target_kind; + int target_group; + WrTransKind kind; +}; + +struct PortVariant { + dict options; + PortKind kind; + int clk_shared; + ClkPolKind clk_pol; + bool clk_en; + bool width_tied; + int min_wr_wide_log2; + int max_wr_wide_log2; + int min_rd_wide_log2; + int max_rd_wide_log2; + bool rd_en; + RdWrKind rdwr; + ResetValKind rdinitval; + ResetValKind rdarstval; + ResetValKind rdsrstval; + SrstKind rdsrstmode; + bool rdsrst_block_wr; + bool wrbe_separate; + std::vector wrprio; + std::vector wrtrans; +}; + +struct PortGroup { + bool optional; + bool optional_rw; + std::vector names; + std::vector variants; +}; + +struct RamClock { + std::string name; + bool anyedge; +}; + +struct Ram { + IdString id; + RamKind kind; + dict options; + std::vector port_groups; + bool prune_rom; + int abits; + std::vector dbits; + WidthMode width_mode; + std::string resource_name; + int resource_count; + double cost; + double widthscale; + int byte; + MemoryInitKind init; + std::vector style; + std::vector shared_clocks; +}; + +struct Library { + std::vector rams; +}; + +Library parse_library(const std::vector &filenames, const pool &defines); + +} + +YOSYS_NAMESPACE_END + +#endif diff --git a/passes/memory/memlib.md b/passes/memory/memlib.md new file mode 100644 index 000000000..fdc2d4bed --- /dev/null +++ b/passes/memory/memlib.md @@ -0,0 +1,505 @@ +# The `memory_libmap` pass + +The `memory_libmap` pass is used to map memories to hardware primitives. To work, +it needs a description of available target memories in a custom format. + + +## Basic structure + +A basic library could look like this: + + # A distributed-class RAM called $__RAM16X4SDP_ + ram distributed $__RAM16X4SDP_ { + # Has 4 address bits (ie. 16 rows). + abits 4; + # Has 4 data bits. + width 4; + # Cost for the selection heuristic. + cost 4; + # Can be initialized to any value on startup. + init any; + # Has a synchronous write port called "W"... + port sw "W" { + # ... with a positive edge clock. + clock posedge; + } + # Has an asynchronous read port called "R". + port ar "R" { + } + } + + # A block-class RAM called $__RAMB9K_ + ram block $__RAMB9K_ { + # Has 13 address bits in the base (most narrow) data width. + abits 13; + # The available widths are: + # - 1 (13 address bits) + # - 2 (12 address bits) + # - 4 (11 address bits) + # - 9 (10 address bits) + # - 18 (9 address bits) + # The width selection is per-port. + widths 1 2 4 9 18 per_port; + # Has a write enable signal with 1 bit for every 9 data bits. + byte 9; + cost 64; + init any; + # Has two synchronous read+write ports, called "A" and "B". + port srsw "A" "B" { + clock posedge; + # Has a clock enable signal (gates both read and write). + clken; + # Has three per-port selectable options for handling read+write behavior: + portoption "RDWR" "NO_CHANGE" { + # When port is writing, reading is not done (output register keeps + # its value). + rdwr no_change; + } + portoption "RDWR" "OLD" { + # When port is writing, the data read is the old value (before the + # write). + rdwr old; + } + portoption "RDWR" "NEW" { + # When port is writing, the data read is the new value. + rdwr new; + } + } + } + +The pass will automatically select between the two available cells and +the logic fallback (mapping the whole memory to LUTs+FFs) based on required +capabilities and cost function. The selected memories will be transformed +to intermediate `$__RAM16X4SDP_` and `$__RAMB9K_` cells that need to be mapped +to actual hardware cells by a `techmap` pass, while memories selected for logic +fallback will be left unmapped and will be later mopped up by `memory_map` pass. + +## RAM definition blocks + +The syntax for a RAM definition is: + + ram { + + + } + +The `` is used as the type of the mapped cell that will be passed to `techmap`. +The memory kind is one of `distributed`, `block`, or `huge`. It describes the general +class of the memory and can be matched on by manual selection attributes. + +The available ram properties are: + +- `abits
;` +- `width ;` +- `widths ... ;` +- `byte ;` +- `cost ;` +- `widthscale [];` +- `resource ;` +- `init ;` +- `style "" "" "" ...;` +- `prune_rom;` + +### RAM dimensions + +The memory dimensions are described by `abits` and `width` or `widths` properties. + +For a simple memory cell with a fixed width, use `abits` and `width` like this: + + abits 4; + width 4; + +This will result in a `2**abits × width` memory cell. + +Multiple-width memories are also possible, and use the `widths` property instead. +The rules for multiple-width memories are: + +- the widths are given in `widths` property in increasing order +- the value in the `abits` property corresponds to the most narrow width +- every width in the list needs to be greater than or equal to twice + the previous width (ie. `1 2 4 9 18` is valid, `1 2 4 7 14` is not) +- it is assumed that, for every width in progression, the word in memory + is made of two smaller words, plus optionally some extra bits (eg. in the above + list, the 9-bit word is made of two 4-bit words and 1 extra bit), and thus + each sequential width in the list corresponds to one fewer usable address bit +- all addresses connected to memory ports are always `abits` bits wide, with const + zero wired to the unused bits corresponding to wide ports + +When multiple widths are specified, they can be `per_port` or `global`. +For the `global` version, the pass has to pick one width for the whole cell, +and it is set on the resulting cell as the `WIDTH` parameter. For the `per_port` +version, the selection is made on per-port basis, and passed using `PORT_*_WIDTH` +parameters. When the mode is `per_port`, the width selection can be fine-tuned +with the port `width` property. + +Specifying dimensions is mandatory. + + +### Byte width + +If the memory cell has per-byte write enables, the `byte` property can be used +to define the byte size (ie. how many data bits correspond to one write enable +bit). + +The property is optional. If not used, it is assumed that there is a single +write enable signal for each writable port. + +The rules for this property are as follows: + +- for every available width, the width needs to be a multiple of the byte size, + or the byte size needs to be larger than the width +- if the byte size is larger than the width, the byte enable signel is assumed + to be one bit wide and cover the whole port +- otherwise, the byte enable signal has one bit for every `byte` bits of the + data port + +The exact kind of byte enable signal is determined by the presence or absence +of the per-port `wrbe_separate` property. + + +### Cost properties + +The `cost` property is used to estimate the cost of using a given mapping. +This is the cost of using one cell, and will be scaled as appropriate if +the mapping requires multiple cells. + +If the `widthscale` property is specified, the mapping is assumed to be flexible, +with cost scaling with the percentage of data width actually used. The value +of the `widthscale` property is how much of the cost is scalable as such. +If the value is omitted, all of the cost is assumed to scale. +Eg. for the following properties: + + width 14; + cost 8; + widthscale 7; + +The cost of a given cell will be assumed to be `(8 - 7) + 7 * (used_bits / 14)`. + +If `widthscale` is used, The pass will attach a `BITS_USED` parameter to mapped +calls, with a bitmask of which data bits of the memory are actually in use. +The parameter width will be the widest width in the `widths` property, and +the bit correspondence is defined accordingly. + +The `cost` property is mandatory. + + +### `init` property + +This property describes the state of the memory at initialization time. Can have +one of the following values: + +- `none`: the memory contents are unpredictable, memories requiring any sort + of initialization will not be mapped to this cell +- `zero`: the memory contents are zero, memories can be mapped to this cell iff + their initialization value is entirely zero or undef +- `any`: the memory contents can be arbitrarily selected, and the initialization + will be passes as the `INIT` parameter to the mapped cell +- `no_undef`: like `any`, but only 0 and 1 bit values are supported (the pass will + convert any x bits to 0) + +The `INIT` parameter is always constructed as a concatenation of words corresponding +to the widest available `widths` setting, so that all available memory cell bits +are covered. + +This property is optional and assumed to be `none` when not present. + + +### `style` property + +Provides a name (or names) for this definition that can be passed to the `ram_style` +or similar attribute to manually select it. Optional and can be used multiple times. + + +### `prune_rom` property + +Specifying this property disqualifies the definition from consideration for source +memories that have no write ports (ie. ROMs). Use this on definitions that have +an obviously superior read-only alternative (eg. LUTRAMs) to make the pass skip +them over quickly. + + +## Port definition blocks + +The syntax for a port group definition is: + + port "NAME 1" "NAME 2" ... { + + } + +A port group definition defines a group of ports with identical properties. +There are as many ports in a group as there are names given. + +Ports come in 5 kinds: + +- `ar`: asynchronous read port +- `sr`: synchronous read port +- `sw`: synchronous write port +- `arsw`: simultanous synchronous write + asynchronous read with common address (commonly found in LUT RAMs) +- `srsw`: synchronous write + synchronous read with common address + +The port properties available are: + +- `width ;` +- `width ...;` +- `width ...;` +- `width rd ... wr ...;` +- `clock ["SHARED_NAME"];` +- `clken;` +- `rden;` +- `wrbe_separate;` +- `rdwr ;` +- `rdinit ;` +- `rdarst ;` +- `rdsrst [block_wr];` +- `wrprio "NAME" "NAME" ...;` +- `wrtrans <"NAME"|all> ;` +- `optional;` +- `optional_rw;` + +The base signals connected to the mapped cell for ports are: + +- `PORT__ADDR`: the address +- `PORT__WR_DATA`: the write data (for `sw`/`arsw`/`srsw` ports only) +- `PORT__RD_DATA`: the read data (for `ar`/`sr`/`arsw`/`srsw` ports only) +- `PORT__WR_EN`: the write enable or enables (for `sw`/`arsw`/`srsw` ports only) + +The address is always `abits` wide. If a non-narrowest width is used, the appropriate low +bits will be tied to 0. + + +### Port `width` prooperty + +If the RAM has `per_port` widths, the available width selection can be further described +on per-port basis, by using one of the following properties: + +- `width tied;`: any width from the master `widths` list is acceptable, and + (for read+write ports) the read and write width has to be the same +- `width tied ...;`: like above, but limits the width + selection to the given list; the list has to be a contiguous sublist of the + master `widths` list +- `width ...;`: alias for the above, to be used for read-only + or write-only ports +- `width mix;`: any width from the master `widths` list is acceptable, and + read width can be different than write width (only usable for read+write ports) +- `width mix ...;`: like above, but limits the width + selection to the given list; the list has to be a contiguous sublist of the + master `widths` list +- `width rd ... wr ...;`: like above, + but the limitted selection can be different for read and write widths + +If `per_port` widths are in use and this property is not specified, `width tied;` is assumed. + +The parameters attached to the cell in `per_port` widths mode are: + +- `PORT__WIDTH`: the selected width (for `tied` ports) +- `PORT__RD_WIDTH`: the selected read width (for `mix` ports) +- `PORT__WR_WIDTH`: the selected write width (for `mix` ports) + + +### `clock` property + +The `clock` property is used with synchronous ports (and synchronous ports only). +It is mandatory for them and describes the clock polarity and clock sharing. +`anyedge` means that both polarities are supported. + +If a shared clock name is provided, the port is assumed to have a shared clock signal +with all other ports using the same shared name. Otherwise, the port is assumed to +have its own clock signal. + +The port clock is always provided on the memory cell as `PORT__CLK` signal +(even if it is also shared). Shared clocks are also provided as `CLK_` +signals. + +For `anyedge` clocks, the cell gets a `PORT__CLKPOL` parameter that is set +to 1 for `posedge` clocks and 0 for `negedge` clocks. If the clock is shared, +the same information will also be provided as `CLK__POL` parameter. + + +### `clken` and `rden` + +The `clken` property, if present, means that the port has a clock enable signal +gating both reads and writes. Such signal will be provided to the mapped cell +as `PORT__CLK_EN`. It is only applicable to synchronous ports. + +The `rden` property, if present, means that the port has a read clock enable signal. +Such signal will be provided to the mapped cell as `PORT__RD_EN`. It is only +applicable to synchronous read ports (`sr` and `srsw`). + +For `sr` ports, both of these options are effectively equivalent. + + +### `wrbe_separate` and the write enables + +The `wrbe_separate` property specifies that the write byte enables are provided +as a separate signal from the main write enable. It can only be used when the +RAM-level `byte` property is also specified. + +The rules are as follows: + +If no `byte` is specified: + +- `wrbe_separate` is not allowed +- `PORT__WR_EN` signal is single bit + +If `byte` is specified, but `wrbe_separate` is not: + +- `PORT__WR_EN` signal has one bit for every data byte +- `PORT__WR_EN_WIDTH` parameter is the width of the above (only present for multiple-width cells) + +If `byte` is specified and `wrbe_separate` is present: + +- `PORT__WR_EN` signal is single bit +- `PORT__WR_BE` signal has one bit for every data byte +- `PORT__WR_BE_WIDTH` parameter is the width of the above (only present for multiple-width cells) +- a given byte is written iff all of `CLK_EN` (if present), `WR_EN`, and the corresponding `WR_BE` bit are one + +This property can only be used on write ports. + + +### `rdwr` property + +This property is allowed only on `srsw` ports and describes read-write interactions. + +The possible values are: + +- `no_change`: if write is being performed (any bit of `WR_EN` is set), + reading is not performed and the `RD_DATA` keeps its old value +- `undefined`: all `RD_DATA` bits corresponding to enabled `WR_DATA` bits + have undefined value, remaining bits read from memory +- `old`: all `RD_DATA` bits get the previous value in memory +- `new`: all `RD_DATA` bits get the new value in memory (transparent write) +- `new_only`: all `RD_DATA` bits corresponding to enabled `WR_DATA` bits + get the new value, all others are undefined + +If this property is not found on an `srsw` port, `undefined` is assumed. + + +### Read data initial value and resets + +The `rdinit`, `rdarst`, and `rdsrst` are applicable only to synchronous read +ports. + +`rdinit` describes the initial value of the read port data, and can be set to +one of the following: + +- `none`: initial data is indeterminate +- `zero`: initial data is all-0 +- `any`: initial data is arbitrarily configurable, and the selected value + will be attached to the cell as `PORT__RD_INIT_VALUE` parameter +- `no_undef`: like `any`, but only 0 and 1 bits are allowed + +`rdarst` and `rdsrst` describe the asynchronous and synchronous reset capabilities. +The values are similar to `rdinit`: + +- `none`: no reset +- `zero`: reset to all-0 data +- `any`: reset to arbitrary value, the selected value + will be attached to the cell as `PORT__RD_ARST_VALUE` or + `PORT__RD_SRST_VALUE` parameter +- `no_undef`: like `any`, but only 0 and 1 bits are allowed +- `init`: reset to the initial value, as specified by `rdinit` (which must be `any` + or `no_undef` itself) + +If the capability is anything other than `none`, the reset signal +will be provided as `PORT__RD_ARST` or `PORT__RD_SRST`. + +For `rdsrst`, the priority must be additionally specified, as one of: + +- `ungated`: `RD_SRST` has priority over both `CLK_EN` and `RD_EN` (if present) +- `gated_clken`: `CLK_EN` has priority over `RD_SRST`; `RD_SRST` has priority over `RD_EN` if present +- `gated_rden`: `RD_EN` and `CLK_EN` (if present) both have priority over `RD_SRST` + +Also, `rdsrst` can optionally have `block_wr` specified, which means that sync reset +cannot be performed in the same cycle as a write. + +If not provided, `none` is assumed for all three properties. + + +### Write priority + +The `wrprio` property is only allowed on write ports and defines a priority relationship +between port — when `wrprio "B";` is used in definition of port `"A"`, and both ports +simultanously write to the same memory cell, the value written by port `"A"` will have +precedence. + +This property is optional, and can be used multiple times as necessary. If no relationship +is described for a pair of write ports, no priority will be assumed. + + +### Write transparency + +The `wrtrans` property is only allowed on write ports and defines behavior when +another synchronous read port reads from the memory cell at the same time as the +given port writes it. The values are: + +- `old`: the read port will get the old value of the cell +- `new`: the read port will get the new value of the cell + +This property is optional, and can be used multiple times as necessary. If no relationship +is described for a pair of ports, the value read is assumed to be indeterminate. + +Note that this property is not used to describe the read value on the port itself for `srsw` +ports — for that purpose, the `rdwr` property is used instead. + + +### Optional ports + +The `optional;` property will make the pass attach a `PORT__USED` parameter +with a boolean value specifying whether a given port was meaningfully used in +mapping a given cell. Likewise, `optional_rw;` will attach `PORT__RD_USED` +and `PORT__WR_USED` the specify whether the read / write part in particular +was used. These can be useful if the mapping has some meaningful optimization +to apply for unused ports, but doesn't otherwise influence the selection process. + + +## Options + +For highly configurable cells, multiple variants may be described in one cell description. +All properties and port definitions within a RAM or port definition can be put inside +an `option` block as follows: + + option "NAME" { + + } + +The value and name of an option are arbitrary, and the selected option value +will be provided to the cell as `OPTION_` parameter. Values can be +strings or integers. + + +Likewise, for per-port options, a `portoption` block can be used: + + portoption "NAME" { + + } + +These options will be provided as `PORT__OPTION_` parameters. + +The library parser will simply expand the RAM definition for every possible combination +of option values mentioned in the RAM body, and likewise for port definitions. +This can lead to a combinatorial explosion. + +If some option values cannot be used together, a `forbid` pseudo-property can be used +to discard a given combination, eg: + + option "ABC" 1 { + portoption "DEF" "GHI" { + forbid; + } + } + +will disallow combining the RAM option `ABC = 2` with port option `DEF = "GHI"`. + + +## Ifdefs + +To allow reusing a library for multiple FPGA families with slighly differing +capabilities, `ifdef` (and `ifndef`) blocks are provided: + + ifdef IS_FANCY_FPGA_WITH_CONFIGURABLE_ASYNC_RESET { + rdarst any; + } else { + rdarst zero; + } + +Such blocks can be enabled by passing the `-D` option to the pass. diff --git a/passes/memory/memory_libmap.cc b/passes/memory/memory_libmap.cc new file mode 100644 index 000000000..ab7bb7bb2 --- /dev/null +++ b/passes/memory/memory_libmap.cc @@ -0,0 +1,2093 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Copyright (C) 2021 Marcelina Kościelnicka + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "memlib.h" + +#include + +#include "kernel/yosys.h" +#include "kernel/sigtools.h" +#include "kernel/mem.h" +#include "kernel/qcsat.h" + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +using namespace MemLibrary; + +#define FACTOR_MUX 0.5 +#define FACTOR_DEMUX 0.5 +#define FACTOR_EMU 2 + +struct PassOptions { + bool no_auto_distributed; + bool no_auto_block; + bool no_auto_huge; + double logic_cost_rom; + double logic_cost_ram; +}; + +struct WrPortConfig { + // Index of the read port this port is merged with, or -1 if none. + int rd_port; + // Index of the PortGroup in the Ram. + int port_group; + int port_variant; + const PortVariant *def; + // Emulate priority logic for this list of (source) write port indices. + std::vector emu_prio; + // If true, this port needs to end up with uniform byte enables to work correctly. + bool force_uniform; + + WrPortConfig() : rd_port(-1), force_uniform(false) {} +}; + +struct RdPortConfig { + // Index of the write port this port is merged with, or -1 if none. + int wr_port; + // Index of the PortGroup in the Ram. + int port_group; + int port_variant; + const PortVariant *def; + // If true, this is a sync port mapped into async mem, make an output + // register. Mutually exclusive with the following options. + bool emu_sync; + // Emulate the EN / ARST / SRST / init value circuitry. + bool emu_en; + bool emu_arst; + bool emu_srst; + bool emu_init; + // Emulate EN-SRST priority. + bool emu_srst_en_prio; + // If true, use clk_en as rd_en. + bool rd_en_to_clk_en; + // Emulate transparency logic for this list of (source) write port indices. + std::vector emu_trans; + + RdPortConfig() : wr_port(-1), emu_sync(false), emu_en(false), emu_arst(false), emu_srst(false), emu_init(false), emu_srst_en_prio(false), rd_en_to_clk_en(false) {} +}; + +// The named clock and clock polarity assignments. +struct SharedClockConfig { + bool used; + SigBit clk; + // For anyedge clocks. + bool polarity; + // For non-anyedge clocks. + bool invert; +}; + +struct MemConfig { + // Reference to the library ram definition + const Ram *def; + // Port assignments, indexed by Mem port index. + std::vector wr_ports; + std::vector rd_ports; + std::vector shared_clocks; + // Emulate read-first write-read behavior using soft logic. + bool emu_read_first; + // This many low bits of (target) address are always-0 on all ports. + int base_width_log2; + int unit_width_log2; + std::vector swizzle; + int hard_wide_mask; + int emu_wide_mask; + // How many times the base memory block will need to be duplicated to get more + // data bits. + int repl_d; + // How many times the whole memory array will need to be duplicated to cover + // all read ports required. + int repl_port; + // Emulation score — how much circuitry we need to add for priority / transparency / + // reset / initial value emulation. + int score_emu; + // Mux score — how much circuitry we need to add to manually decode whatever address + // bits are not decoded by the memory array itself, for reads. + int score_mux; + // Demux score — how much circuitry we need to add to manually decode whatever address + // bits are not decoded by the memory array itself, for writes. + int score_demux; + double cost; + MemConfig() : emu_read_first(false) {} +}; + +typedef std::vector MemConfigs; + +struct MapWorker { + Module *module; + ModWalker modwalker; + SigMap sigmap; + SigMap sigmap_xmux; + FfInitVals initvals; + + MapWorker(Module *module) : module(module), modwalker(module->design, module), sigmap(module), sigmap_xmux(module), initvals(&sigmap, module) { + for (auto cell : module->cells()) + { + if (cell->type == ID($mux)) + { + RTLIL::SigSpec sig_a = sigmap_xmux(cell->getPort(ID::A)); + RTLIL::SigSpec sig_b = sigmap_xmux(cell->getPort(ID::B)); + + if (sig_a.is_fully_undef()) + sigmap_xmux.add(cell->getPort(ID::Y), sig_b); + else if (sig_b.is_fully_undef()) + sigmap_xmux.add(cell->getPort(ID::Y), sig_a); + } + } + } +}; + +struct SwizzleBit { + bool valid; + int mux_idx; + int addr; + int bit; +}; + +struct Swizzle { + int addr_shift; + int addr_start; + int addr_end; + std::vector addr_mux_bits; + std::vector> bits; +}; + +struct MemMapping { + MapWorker &worker; + QuickConeSat qcsat; + Mem &mem; + const Library &lib; + const PassOptions &opts; + std::vector cfgs; + bool logic_ok; + double logic_cost; + RamKind kind; + std::string style; + dict wr_en_cache; + dict, bool> wr_implies_rd_cache; + dict, bool> wr_excludes_rd_cache; + dict, bool> wr_excludes_srst_cache; + + MemMapping(MapWorker &worker, Mem &mem, const Library &lib, const PassOptions &opts) : worker(worker), qcsat(worker.modwalker), mem(mem), lib(lib), opts(opts) { + determine_style(); + logic_ok = determine_logic_ok(); + if (GetSize(mem.wr_ports) == 0) + logic_cost = mem.width * mem.size * opts.logic_cost_rom; + else + logic_cost = mem.width * mem.size * opts.logic_cost_ram; + if (kind == RamKind::Logic) + return; + for (int i = 0; i < GetSize(lib.rams); i++) { + auto &rdef = lib.rams[i]; + if (!check_ram_kind(rdef)) + continue; + if (!check_ram_style(rdef)) + continue; + if (!check_init(rdef)) + continue; + if (rdef.prune_rom && mem.wr_ports.empty()) + continue; + MemConfig cfg; + cfg.def = &rdef; + for (auto &cdef: rdef.shared_clocks) { + (void)cdef; + SharedClockConfig clk; + clk.used = false; + cfg.shared_clocks.push_back(clk); + } + cfgs.push_back(cfg); + } + assign_wr_ports(); + assign_rd_ports(); + handle_trans(); + // If we got this far, the memory is mappable. The following two can require emulating + // some functionality, but cannot cause the mapping to fail. + handle_priority(); + handle_rd_rst(); + score_emu_ports(); + // Now it is just a matter of picking geometry. + handle_geom(); + dump_configs(0); + prune_post_geom(); + dump_configs(1); + } + + bool addr_compatible(int wpidx, int rpidx) { + auto &wport = mem.wr_ports[wpidx]; + auto &rport = mem.rd_ports[rpidx]; + int max_wide_log2 = std::max(rport.wide_log2, wport.wide_log2); + SigSpec raddr = rport.addr.extract_end(max_wide_log2); + SigSpec waddr = wport.addr.extract_end(max_wide_log2); + int abits = std::max(GetSize(raddr), GetSize(waddr)); + raddr.extend_u0(abits); + waddr.extend_u0(abits); + return worker.sigmap_xmux(raddr) == worker.sigmap_xmux(waddr); + } + + int get_wr_en(int wpidx) { + auto it = wr_en_cache.find(wpidx); + if (it != wr_en_cache.end()) + return it->second; + int res = qcsat.ez->expression(qcsat.ez->OpOr, qcsat.importSig(mem.wr_ports[wpidx].en)); + wr_en_cache.insert({wpidx, res}); + return res; + } + + bool get_wr_implies_rd(int wpidx, int rpidx) { + auto key = std::make_pair(wpidx, rpidx); + auto it = wr_implies_rd_cache.find(key); + if (it != wr_implies_rd_cache.end()) + return it->second; + int wr_en = get_wr_en(wpidx); + int rd_en = qcsat.importSigBit(mem.rd_ports[rpidx].en[0]); + qcsat.prepare(); + bool res = !qcsat.ez->solve(wr_en, qcsat.ez->NOT(rd_en)); + wr_implies_rd_cache.insert({key, res}); + return res; + } + + bool get_wr_excludes_rd(int wpidx, int rpidx) { + auto key = std::make_pair(wpidx, rpidx); + auto it = wr_excludes_rd_cache.find(key); + if (it != wr_excludes_rd_cache.end()) + return it->second; + int wr_en = get_wr_en(wpidx); + int rd_en = qcsat.importSigBit(mem.rd_ports[rpidx].en[0]); + qcsat.prepare(); + bool res = !qcsat.ez->solve(wr_en, rd_en); + wr_excludes_rd_cache.insert({key, res}); + return res; + } + + bool get_wr_excludes_srst(int wpidx, int rpidx) { + auto key = std::make_pair(wpidx, rpidx); + auto it = wr_excludes_srst_cache.find(key); + if (it != wr_excludes_srst_cache.end()) + return it->second; + int wr_en = get_wr_en(wpidx); + int srst = qcsat.importSigBit(mem.rd_ports[rpidx].srst); + if (mem.rd_ports[rpidx].ce_over_srst) { + int rd_en = qcsat.importSigBit(mem.rd_ports[rpidx].en[0]); + srst = qcsat.ez->AND(srst, rd_en); + } + qcsat.prepare(); + bool res = !qcsat.ez->solve(wr_en, srst); + wr_excludes_srst_cache.insert({key, res}); + return res; + } + + void dump_configs(int stage); + void dump_config(MemConfig &cfg); + void determine_style(); + bool determine_logic_ok(); + bool check_ram_kind(const Ram &ram); + bool check_ram_style(const Ram &ram); + bool check_init(const Ram &ram); + void assign_wr_ports(); + void assign_rd_ports(); + void handle_trans(); + void handle_priority(); + void handle_rd_rst(); + void score_emu_ports(); + void handle_geom(); + void prune_post_geom(); + void emit_port(const MemConfig &cfg, std::vector &cells, const PortVariant &pdef, const char *name, int wpidx, int rpidx, const std::vector &hw_addr_swizzle); + void emit(const MemConfig &cfg); +}; + +void MemMapping::dump_configs(int stage) { + const char *stage_name; + switch (stage) { + case 0: + stage_name = "post-geometry"; + break; + case 1: + stage_name = "after post-geometry prune"; + break; + default: + abort(); + } + log_debug("Memory %s.%s mapping candidates (%s):\n", log_id(mem.module->name), log_id(mem.memid), stage_name); + if (logic_ok) { + log_debug("- logic fallback\n"); + log_debug(" - cost: %f\n", logic_cost); + } + for (auto &cfg: cfgs) { + dump_config(cfg); + } +} + +void MemMapping::dump_config(MemConfig &cfg) { + log_debug("- %s:\n", log_id(cfg.def->id)); + for (auto &it: cfg.def->options) + log_debug(" - option %s %s\n", it.first.c_str(), log_const(it.second)); + log_debug(" - emulation score: %d\n", cfg.score_emu); + log_debug(" - replicates (for ports): %d\n", cfg.repl_port); + log_debug(" - replicates (for data): %d\n", cfg.repl_d); + log_debug(" - mux score: %d\n", cfg.score_mux); + log_debug(" - demux score: %d\n", cfg.score_demux); + log_debug(" - cost: %f\n", cfg.cost); + std::stringstream os; + for (int x: cfg.def->dbits) + os << " " << x; + std::string dbits_s = os.str(); + log_debug(" - abits %d dbits%s\n", cfg.def->abits, dbits_s.c_str()); + if (cfg.def->byte != 0) + log_debug(" - byte width %d\n", cfg.def->byte); + log_debug(" - chosen base width %d\n", cfg.def->dbits[cfg.base_width_log2]); + os.str(""); + for (int x: cfg.swizzle) + if (x == -1) + os << " -"; + else + os << " " << x; + std::string swizzle_s = os.str(); + log_debug(" - swizzle%s\n", swizzle_s.c_str()); + os.str(""); + for (int i = 0; (1 << i) <= cfg.hard_wide_mask; i++) + if (cfg.hard_wide_mask & 1 << i) + os << " " << i; + std::string wide_s = os.str(); + if (cfg.hard_wide_mask) + log_debug(" - hard wide bits%s\n", wide_s.c_str()); + if (cfg.emu_read_first) + log_debug(" - emulate read-first behavior\n"); + for (int i = 0; i < GetSize(mem.wr_ports); i++) { + auto &pcfg = cfg.wr_ports[i]; + if (pcfg.rd_port == -1) + log_debug(" - write port %d: port group %s\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str()); + else + log_debug(" - write port %d: port group %s (shared with read port %d)\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str(), pcfg.rd_port); + + for (auto &it: pcfg.def->options) + log_debug(" - option %s %s\n", it.first.c_str(), log_const(it.second)); + if (cfg.def->width_mode == WidthMode::PerPort) { + std::stringstream os; + for (int i = pcfg.def->min_wr_wide_log2; i <= pcfg.def->max_wr_wide_log2; i++) + os << " " << cfg.def->dbits[i]; + std::string widths_s = os.str(); + const char *note = ""; + if (pcfg.rd_port != -1) + note = pcfg.def->width_tied ? " (tied)" : " (independent)"; + log_debug(" - widths%s%s\n", widths_s.c_str(), note); + } + for (auto i: pcfg.emu_prio) + log_debug(" - emulate priority over write port %d\n", i); + } + for (int i = 0; i < GetSize(mem.rd_ports); i++) { + auto &pcfg = cfg.rd_ports[i]; + if (pcfg.wr_port == -1) + log_debug(" - read port %d: port group %s\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str()); + else + log_debug(" - read port %d: port group %s (shared with write port %d)\n", i, cfg.def->port_groups[pcfg.port_group].names[0].c_str(), pcfg.wr_port); + for (auto &it: pcfg.def->options) + log_debug(" - option %s %s\n", it.first.c_str(), log_const(it.second)); + if (cfg.def->width_mode == WidthMode::PerPort) { + std::stringstream os; + for (int i = pcfg.def->min_rd_wide_log2; i <= pcfg.def->max_rd_wide_log2; i++) + os << " " << cfg.def->dbits[i]; + std::string widths_s = os.str(); + const char *note = ""; + if (pcfg.wr_port != -1) + note = pcfg.def->width_tied ? " (tied)" : " (independent)"; + log_debug(" - widths%s%s\n", widths_s.c_str(), note); + } + if (pcfg.emu_sync) + log_debug(" - emulate data register\n"); + if (pcfg.emu_en) + log_debug(" - emulate clock enable\n"); + if (pcfg.emu_arst) + log_debug(" - emulate async reset\n"); + if (pcfg.emu_srst) + log_debug(" - emulate sync reset\n"); + if (pcfg.emu_init) + log_debug(" - emulate init value\n"); + if (pcfg.emu_srst_en_prio) + log_debug(" - emulate sync reset / enable priority\n"); + for (auto i: pcfg.emu_trans) + log_debug(" - emulate transparency with write port %d\n", i); + } +} + +// Go through memory attributes to determine user-requested mapping style. +void MemMapping::determine_style() { + kind = RamKind::Auto; + style = ""; + if (mem.get_bool_attribute(ID::lram)) { + kind = RamKind::Huge; + return; + } + for (auto attr: {ID::ram_block, ID::rom_block, ID::ram_style, ID::rom_style, ID::ramstyle, ID::romstyle, ID::syn_ramstyle, ID::syn_romstyle}) { + if (mem.has_attribute(attr)) { + Const val = mem.attributes.at(attr); + if (val == 1) { + kind = RamKind::NotLogic; + return; + } + std::string val_s = val.decode_string(); + for (auto &c: val_s) + c = std::tolower(c); + if (val_s == "auto") { + // Nothing. + } else if (val_s == "logic" || val_s == "registers") { + kind = RamKind::Logic; + } else if (val_s == "distributed") { + kind = RamKind::Distributed; + } else if (val_s == "block" || val_s == "block_ram" || val_s == "ebr") { + kind = RamKind::Block; + } else if (val_s == "huge" || val_s == "ultra") { + kind = RamKind::Huge; + } else { + kind = RamKind::NotLogic; + style = val_s; + } + return; + } + } + if (mem.get_bool_attribute(ID::logic_block)) + kind = RamKind::Logic; +} + +// Determine whether the memory can be mapped entirely to soft logic. +bool MemMapping::determine_logic_ok() { + if (kind != RamKind::Auto && kind != RamKind::Logic) + return false; + // Memory is mappable entirely to soft logic iff all its write ports are in the same clock domain. + if (mem.wr_ports.empty()) + return true; + for (auto &port: mem.wr_ports) { + if (!port.clk_enable) + return false; + if (port.clk != mem.wr_ports[0].clk) + return false; + if (port.clk_polarity != mem.wr_ports[0].clk_polarity) + return false; + } + return true; +} + +// Apply RAM kind restrictions (logic/distributed/block/huge), if any. +bool MemMapping::check_ram_kind(const Ram &ram) { + if (style != "") + return true; + if (ram.kind == kind) + return true; + if (kind == RamKind::Auto || kind == RamKind::NotLogic) { + if (ram.kind == RamKind::Distributed && opts.no_auto_distributed) + return false; + if (ram.kind == RamKind::Block && opts.no_auto_block) + return false; + if (ram.kind == RamKind::Huge && opts.no_auto_huge) + return false; + return true; + } + return false; +} + +// Apply specific RAM style restrictions, if any. +bool MemMapping::check_ram_style(const Ram &ram) { + if (style == "") + return true; + for (auto &s: ram.style) + if (s == style) + return true; + return false; +} + +// Handle memory initializer restrictions, if any. +bool MemMapping::check_init(const Ram &ram) { + bool has_nonx = false; + bool has_one = false; + + for (auto &init: mem.inits) { + if (init.data.is_fully_undef()) + continue; + has_nonx = true; + for (auto bit: init.data) + if (bit == State::S1) + has_one = true; + } + + switch (ram.init) { + case MemoryInitKind::None: + return !has_nonx; + case MemoryInitKind::Zero: + return !has_one; + default: + return true; + } +} + +bool apply_clock(MemConfig &cfg, const PortVariant &def, SigBit clk, bool clk_polarity) { + if (def.clk_shared == -1) + return true; + auto &cdef = cfg.def->shared_clocks[def.clk_shared]; + auto &ccfg = cfg.shared_clocks[def.clk_shared]; + if (cdef.anyedge) { + if (!ccfg.used) { + ccfg.used = true; + ccfg.clk = clk; + ccfg.polarity = clk_polarity; + return true; + } else { + return ccfg.clk == clk && ccfg.polarity == clk_polarity; + } + } else { + bool invert = clk_polarity ^ (def.clk_pol == ClkPolKind::Posedge); + if (!ccfg.used) { + ccfg.used = true; + ccfg.clk = clk; + ccfg.invert = invert; + return true; + } else { + return ccfg.clk == clk && ccfg.invert == invert; + } + } +} + +// Perform write port assignment, validating clock options as we go. +void MemMapping::assign_wr_ports() { + for (auto &port: mem.wr_ports) { + if (!port.clk_enable) { + // Async write ports not supported. + cfgs.clear(); + return; + } + MemConfigs new_cfgs; + for (auto &cfg: cfgs) { + for (int pgi = 0; pgi < GetSize(cfg.def->port_groups); pgi++) { + auto &pg = cfg.def->port_groups[pgi]; + // Make sure the target port group still has a free port. + int used = 0; + for (auto &oport: cfg.wr_ports) + if (oport.port_group == pgi) + used++; + if (used >= GetSize(pg.names)) + continue; + for (int pvi = 0; pvi < GetSize(pg.variants); pvi++) { + auto &def = pg.variants[pvi]; + // Make sure the target is a write port. + if (def.kind == PortKind::Ar || def.kind == PortKind::Sr) + continue; + MemConfig new_cfg = cfg; + WrPortConfig pcfg; + pcfg.rd_port = -1; + pcfg.port_group = pgi; + pcfg.port_variant = pvi; + pcfg.def = &def; + if (!apply_clock(new_cfg, def, port.clk, port.clk_polarity)) + continue; + new_cfg.wr_ports.push_back(pcfg); + new_cfgs.push_back(new_cfg); + } + } + } + cfgs = new_cfgs; + } +} + +// Perform read port assignment, validating clock and rden options as we go. +void MemMapping::assign_rd_ports() { + for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) { + auto &port = mem.rd_ports[pidx]; + MemConfigs new_cfgs; + for (auto &cfg: cfgs) { + // First pass: read port not shared with a write port. + for (int pgi = 0; pgi < GetSize(cfg.def->port_groups); pgi++) { + auto &pg = cfg.def->port_groups[pgi]; + // Make sure the target port group has a port not used up by write ports. + // Overuse by other read ports is not a problem — this will just result + // in memory duplication. + int used = 0; + for (auto &oport: cfg.wr_ports) + if (oport.port_group == pgi) + used++; + if (used >= GetSize(pg.names)) + continue; + for (int pvi = 0; pvi < GetSize(pg.variants); pvi++) { + auto &def = pg.variants[pvi]; + // Make sure the target is a read port. + if (def.kind == PortKind::Sw) + continue; + // If mapping an async port, accept only async defs. + if (!port.clk_enable) { + if (def.kind == PortKind::Sr || def.kind == PortKind::Srsw) + continue; + } + MemConfig new_cfg = cfg; + RdPortConfig pcfg; + pcfg.wr_port = -1; + pcfg.port_group = pgi; + pcfg.port_variant = pvi; + pcfg.def = &def; + if (def.kind == PortKind::Sr || def.kind == PortKind::Srsw) { + pcfg.emu_sync = false; + if (!apply_clock(new_cfg, def, port.clk, port.clk_polarity)) + continue; + // Decide if rden is usable. + if (port.en != State::S1) { + if (def.clk_en) { + pcfg.rd_en_to_clk_en = true; + } else { + pcfg.emu_en = !def.rd_en; + } + } + } else { + pcfg.emu_sync = port.clk_enable; + } + new_cfg.rd_ports.push_back(pcfg); + new_cfgs.push_back(new_cfg); + } + } + // Second pass: read port shared with a write port. + for (int wpidx = 0; wpidx < GetSize(mem.wr_ports); wpidx++) { + auto &wport = mem.wr_ports[wpidx]; + auto &wpcfg = cfg.wr_ports[wpidx]; + auto &def = *wpcfg.def; + // Make sure the write port is not yet shared. + if (wpcfg.rd_port != -1) + continue; + // Make sure the target is a read port. + if (def.kind == PortKind::Sw) + continue; + // Validate address compatibility. + if (!addr_compatible(wpidx, pidx)) + continue; + // Validate clock compatibility, if needed. + if (def.kind == PortKind::Srsw) { + if (!port.clk_enable) + continue; + if (port.clk != wport.clk) + continue; + if (port.clk_polarity != wport.clk_polarity) + continue; + } + // Okay, let's fill it in. + MemConfig new_cfg = cfg; + new_cfg.wr_ports[wpidx].rd_port = pidx; + RdPortConfig pcfg; + pcfg.wr_port = wpidx; + pcfg.port_group = wpcfg.port_group; + pcfg.port_variant = wpcfg.port_variant; + pcfg.def = wpcfg.def; + pcfg.emu_sync = port.clk_enable && def.kind == PortKind::Arsw; + // For srsw, check rden capability. + if (def.kind == PortKind::Srsw) { + bool trans = port.transparency_mask[wpidx]; + bool col_x = port.collision_x_mask[wpidx]; + if (def.rdwr == RdWrKind::NoChange) { + if (!get_wr_excludes_rd(wpidx, pidx)) { + if (!trans && !col_x) + continue; + if (trans) + pcfg.emu_trans.push_back(wpidx); + new_cfg.wr_ports[wpidx].force_uniform = true; + } + if (port.en != State::S1) { + if (def.clk_en) { + pcfg.rd_en_to_clk_en = true; + } else { + pcfg.emu_en = !def.rd_en; + } + } + } else { + if (!col_x && !trans && def.rdwr != RdWrKind::Old) + continue; + if (trans) { + if (def.rdwr != RdWrKind::New && def.rdwr != RdWrKind::NewOnly) + pcfg.emu_trans.push_back(wpidx); + } + if (def.rdwr == RdWrKind::NewOnly) { + if (!get_wr_excludes_rd(wpidx, pidx)) + new_cfg.wr_ports[wpidx].force_uniform = true; + } + if (port.en != State::S1) { + if (def.clk_en) { + if (get_wr_implies_rd(wpidx, pidx)) { + pcfg.rd_en_to_clk_en = true; + } else { + pcfg.emu_en = !def.rd_en; + } + } else { + pcfg.emu_en = !def.rd_en; + } + } + } + } + new_cfg.rd_ports.push_back(pcfg); + new_cfgs.push_back(new_cfg); + } + } + cfgs = new_cfgs; + } +} + +// Validate transparency restrictions, determine where to add soft transparency logic. +void MemMapping::handle_trans() { + if (mem.emulate_read_first_ok()) { + MemConfigs new_cfgs; + for (auto &cfg: cfgs) { + new_cfgs.push_back(cfg); + bool ok = true; + // Using this trick will break read-write port sharing. + for (auto &pcfg: cfg.rd_ports) + if (pcfg.wr_port != -1) + ok = false; + if (ok) { + cfg.emu_read_first = true; + new_cfgs.push_back(cfg); + } + } + cfgs = new_cfgs; + } + for (int rpidx = 0; rpidx < GetSize(mem.rd_ports); rpidx++) { + auto &rport = mem.rd_ports[rpidx]; + if (!rport.clk_enable) + continue; + for (int wpidx = 0; wpidx < GetSize(mem.wr_ports); wpidx++) { + auto &wport = mem.wr_ports[wpidx]; + if (!wport.clk_enable) + continue; + if (rport.clk != wport.clk) + continue; + if (rport.clk_polarity != wport.clk_polarity) + continue; + // If we got this far, we have a transparency restriction + // to uphold. + MemConfigs new_cfgs; + for (auto &cfg: cfgs) { + auto &rpcfg = cfg.rd_ports[rpidx]; + auto &wpcfg = cfg.wr_ports[wpidx]; + // The transparency relation for shared ports already handled while assigning them. + if (rpcfg.wr_port == wpidx) { + new_cfgs.push_back(cfg); + continue; + } + if (rport.collision_x_mask[wpidx] && !cfg.emu_read_first) { + new_cfgs.push_back(cfg); + continue; + } + bool transparent = rport.transparency_mask[wpidx] || cfg.emu_read_first; + if (rpcfg.emu_sync) { + // For async read port, just add the transparency logic + // if necessary. + if (transparent) + rpcfg.emu_trans.push_back(wpidx); + new_cfgs.push_back(cfg); + } else { + // Otherwise, split through the relevant wrtrans caps. + // For non-transparent ports, the cap needs to be present. + // For transparent ports, we can emulate transparency + // even without a direct cap. + bool found = false; + for (auto &tdef: wpcfg.def->wrtrans) { + // Check if the target matches. + if (tdef.target_kind == WrTransTargetKind::Group && rpcfg.port_group != tdef.target_group) + continue; + // Check if the transparency kind is acceptable. + if (transparent) { + if (tdef.kind == WrTransKind::Old) + continue; + } else { + if (tdef.kind != WrTransKind::Old) + continue; + } + // Okay, we can use this cap. + new_cfgs.push_back(cfg); + found = true; + break; + } + if (!found && transparent) { + // If the port pair is transparent, but no cap was + // found, use emulation. + rpcfg.emu_trans.push_back(wpidx); + new_cfgs.push_back(cfg); + } + } + } + cfgs = new_cfgs; + } + } +} + +// Determine where to add soft priority logic. +void MemMapping::handle_priority() { + for (int p1idx = 0; p1idx < GetSize(mem.wr_ports); p1idx++) { + for (int p2idx = 0; p2idx < GetSize(mem.wr_ports); p2idx++) { + auto &port2 = mem.wr_ports[p2idx]; + if (!port2.priority_mask[p1idx]) + continue; + for (auto &cfg: cfgs) { + auto &p1cfg = cfg.rd_ports[p1idx]; + auto &p2cfg = cfg.wr_ports[p2idx]; + bool found = false; + for (auto &pgi: p2cfg.def->wrprio) { + if (pgi == p1cfg.port_group) { + found = true; + break; + } + } + // If no cap was found, emulate. + if (!found) + p2cfg.emu_prio.push_back(p1idx); + } + } + } +} + +bool is_all_zero(const Const &val) { + for (auto bit: val.bits) + if (bit == State::S1) + return false; + return true; +} + +// Determine where to add soft init value / reset logic. +void MemMapping::handle_rd_rst() { + for (auto &cfg: cfgs) { + for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) { + auto &port = mem.rd_ports[pidx]; + auto &pcfg = cfg.rd_ports[pidx]; + // Only sync ports are relevant. + // If emulated by async port or we already emulate CE, init will be + // included for free. + if (!port.clk_enable || pcfg.emu_sync || pcfg.emu_en) + continue; + switch (pcfg.def->rdinitval) { + case ResetValKind::None: + pcfg.emu_init = !port.init_value.is_fully_undef(); + break; + case ResetValKind::Zero: + pcfg.emu_init = !is_all_zero(port.init_value); + break; + default: + break; + } + Const init_val = port.init_value; + if (port.arst != State::S0) { + switch (pcfg.def->rdarstval) { + case ResetValKind::None: + pcfg.emu_arst = true; + break; + case ResetValKind::Zero: + pcfg.emu_arst = !is_all_zero(port.arst_value); + break; + case ResetValKind::Init: + if (init_val.is_fully_undef()) + init_val = port.arst_value; + pcfg.emu_arst = init_val != port.arst_value; + break; + default: + break; + } + } + if (port.srst != State::S0) { + switch (pcfg.def->rdsrstval) { + case ResetValKind::None: + pcfg.emu_srst = true; + break; + case ResetValKind::Zero: + pcfg.emu_srst = !is_all_zero(port.srst_value); + break; + case ResetValKind::Init: + if (init_val.is_fully_undef()) + init_val = port.srst_value; + pcfg.emu_srst = init_val != port.srst_value; + break; + default: + break; + } + if (!pcfg.emu_srst && pcfg.def->rdsrst_block_wr && pcfg.wr_port != -1) { + if (!get_wr_excludes_srst(pcfg.wr_port, pidx)) + pcfg.emu_srst = true; + } + if (!pcfg.emu_srst && port.en != State::S1) { + if (port.ce_over_srst) { + switch (pcfg.def->rdsrstmode) { + case SrstKind::Ungated: + pcfg.emu_srst_en_prio = true; + break; + case SrstKind::GatedClkEn: + pcfg.emu_srst_en_prio = !pcfg.rd_en_to_clk_en; + break; + case SrstKind::GatedRdEn: + break; + default: + log_assert(0); + } + } else { + switch (pcfg.def->rdsrstmode) { + case SrstKind::Ungated: + break; + case SrstKind::GatedClkEn: + if (pcfg.rd_en_to_clk_en) { + if (pcfg.def->rd_en) { + pcfg.rd_en_to_clk_en = false; + } else { + pcfg.emu_srst_en_prio = true; + } + } + break; + case SrstKind::GatedRdEn: + pcfg.emu_srst_en_prio = true; + break; + default: + log_assert(0); + } + } + } + } else { + if (pcfg.def->rd_en && pcfg.def->rdwr == RdWrKind::NoChange && pcfg.wr_port != -1) { + pcfg.rd_en_to_clk_en = false; + } + } + } + } +} + +void MemMapping::score_emu_ports() { + for (auto &cfg: cfgs) { + std::vector port_usage_wr(cfg.def->port_groups.size()); + std::vector port_usage_rd(cfg.def->port_groups.size()); + int score = 0; + // 3 points for every write port if we need to do read-first emulation. + if (cfg.emu_read_first) + score += 3 * GetSize(cfg.wr_ports); + for (auto &pcfg: cfg.wr_ports) { + // 1 point for every priority relation we need to fix up. + // This is just a gate for every distinct wren pair. + score += GetSize(pcfg.emu_prio); + port_usage_wr[pcfg.port_group]++; + } + for (auto &pcfg: cfg.rd_ports) { + // 3 points for every soft transparency logic instance. This involves + // registers and other major mess. + score += 3 * GetSize(pcfg.emu_trans); + // 3 points for CE soft logic. Likewise involves registers. + // If we already do this, subsumes any init/srst/arst emulation. + if (pcfg.emu_en) + score += 3; + // 2 points for soft init value / reset logic: involves single bit + // register and some muxes. + if (pcfg.emu_init) + score += 2; + if (pcfg.emu_arst) + score += 2; + if (pcfg.emu_srst) + score += 2; + // 1 point for wrong srst/en priority (fixed with a single gate). + if (pcfg.emu_srst_en_prio) + score++; + // 1 point for every non-shared read port used, as a tiebreaker + // to prefer single-port configs. + if (pcfg.wr_port == -1) { + score++; + port_usage_rd[pcfg.port_group]++; + } + } + cfg.score_emu = score; + int repl_port = 1; + for (int i = 0; i < GetSize(cfg.def->port_groups); i++) { + int space = GetSize(cfg.def->port_groups[i].names) - port_usage_wr[i]; + log_assert(space >= 0); + if (port_usage_rd[i] > 0) { + log_assert(space > 0); + int usage = port_usage_rd[i]; + int cur = (usage + space - 1) / space; + if (cur > repl_port) + repl_port = cur; + } + } + cfg.repl_port = repl_port; + } +} + +void MemMapping::handle_geom() { + std::vector wren_size; + for (auto &port: mem.wr_ports) { + SigSpec en = port.en; + en.sort_and_unify(); + wren_size.push_back(GetSize(en)); + } + for (auto &cfg: cfgs) { + // First, create a set of "byte boundaries": the bit positions in source memory word + // that have write enable different from the previous bit in any write port. + // Bit 0 is considered to be a byte boundary as well. + // Likewise, create a set of "word boundaries" that are like above, but only for write ports + // with the "force uniform" flag set. + std::vector byte_boundary(mem.width, false); + std::vector word_boundary(mem.width, false); + byte_boundary[0] = true; + for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) { + auto &port = mem.wr_ports[pidx]; + auto &pcfg = cfg.wr_ports[pidx]; + if (pcfg.force_uniform) + word_boundary[0] = true; + for (int sub = 0; sub < (1 << port.wide_log2); sub++) { + for (int i = 1; i < mem.width; i++) { + int pos = sub * mem.width + i; + if (port.en[pos] != port.en[pos-1]) { + byte_boundary[i] = true; + if (pcfg.force_uniform) + word_boundary[i] = true; + } + } + } + } + bool got_config = false; + int best_cost = 0; + int byte_width_log2 = 0; + for (int i = 0; i < GetSize(cfg.def->dbits); i++) + if (cfg.def->byte >= cfg.def->dbits[i]) + byte_width_log2 = i; + if (cfg.def->byte == 0) + byte_width_log2 = GetSize(cfg.def->dbits) - 1; + pool no_wide_bits; + // Determine which of the source address bits involved in wide ports + // are "uniform". Bits are considered uniform if, when a port is widened through + // them, the write enables are the same for both values of the bit. + int max_wr_wide_log2 = 0; + for (auto &port: mem.wr_ports) + if (port.wide_log2 > max_wr_wide_log2) + max_wr_wide_log2 = port.wide_log2; + int max_wide_log2 = max_wr_wide_log2; + for (auto &port: mem.rd_ports) + if (port.wide_log2 > max_wide_log2) + max_wide_log2 = port.wide_log2; + int wide_nu_start = max_wide_log2; + int wide_nu_end = max_wr_wide_log2; + for (int i = 0; i < GetSize(mem.wr_ports); i++) { + auto &port = mem.wr_ports[i]; + auto &pcfg = cfg.wr_ports[i]; + for (int j = 0; j < port.wide_log2; j++) { + bool uniform = true; + // If write enables don't match, mark bit as non-uniform. + for (int k = 0; k < (1 << port.wide_log2); k += 2 << j) + if (port.en.extract(k * mem.width, mem.width << j) != port.en.extract((k + (1 << j)) * mem.width, mem.width << j)) + uniform = false; + if (!uniform) { + if (pcfg.force_uniform) { + for (int k = j; k < port.wide_log2; k++) + no_wide_bits.insert(k); + } + if (j < wide_nu_start) + wide_nu_start = j; + break; + } + } + if (pcfg.def->width_tied && pcfg.rd_port != -1) { + // If: + // + // - the write port is merged with a read port + // - the read port is wider than the write port + // - read and write widths are tied + // + // then we will have to artificially widen the write + // port to the width of the read port, and emulate + // a narrower write path by use of write enables, + // which will definitely be non-uniform over the added + // bits. + auto &rport = mem.rd_ports[pcfg.rd_port]; + if (rport.wide_log2 > port.wide_log2) { + if (port.wide_log2 < wide_nu_start) + wide_nu_start = port.wide_log2; + if (rport.wide_log2 > wide_nu_end) + wide_nu_end = rport.wide_log2; + if (pcfg.force_uniform) { + for (int k = port.wide_log2; k < rport.wide_log2; k++) + no_wide_bits.insert(k); + } + } + } + } + // Iterate over base widths. + for (int base_width_log2 = 0; base_width_log2 < GetSize(cfg.def->dbits); base_width_log2++) { + // Now, see how many data bits we actually have available. + // This is usually dbits[base_width_log2], but could be smaller if we + // ran afoul of a max width limitation. Configurations where this + // happens are not useful, unless we need it to satisfy a *minimum* + // width limitation. + int unit_width_log2 = base_width_log2; + for (auto &pcfg: cfg.wr_ports) + if (unit_width_log2 > pcfg.def->max_wr_wide_log2) + unit_width_log2 = pcfg.def->max_wr_wide_log2; + for (auto &pcfg: cfg.rd_ports) + if (unit_width_log2 > pcfg.def->max_rd_wide_log2) + unit_width_log2 = pcfg.def->max_rd_wide_log2; + if (unit_width_log2 != base_width_log2 && got_config) + break; + int unit_width = cfg.def->dbits[unit_width_log2]; + // Also determine effective byte width (the granularity of write enables). + int effective_byte = cfg.def->byte; + if (effective_byte == 0 || effective_byte > unit_width) + effective_byte = unit_width; + if (mem.wr_ports.empty()) + effective_byte = 1; + log_assert(unit_width % effective_byte == 0); + // Create the swizzle pattern. + std::vector swizzle; + for (int i = 0; i < mem.width; i++) { + if (word_boundary[i]) + while (GetSize(swizzle) % unit_width) + swizzle.push_back(-1); + else if (byte_boundary[i]) + while (GetSize(swizzle) % effective_byte) + swizzle.push_back(-1); + swizzle.push_back(i); + } + if (word_boundary[0]) + while (GetSize(swizzle) % unit_width) + swizzle.push_back(-1); + else + while (GetSize(swizzle) % effective_byte) + swizzle.push_back(-1); + // Now evaluate the configuration, then keep adding more hard wide bits + // and evaluating. + int hard_wide_mask = 0; + int hard_wide_num = 0; + bool byte_failed = false; + while (1) { + // Check if all min width constraints are satisfied. + // Only check these constraints for write ports with width below + // byte width — for other ports, we can emulate narrow width with + // a larger one. + bool min_width_ok = true; + int min_width_bit = wide_nu_start; + for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) { + auto &port = mem.wr_ports[pidx]; + int w = base_width_log2; + for (int i = 0; i < port.wide_log2; i++) + if (hard_wide_mask & 1 << i) + w++; + if (w < cfg.wr_ports[pidx].def->min_wr_wide_log2 && w < byte_width_log2) { + min_width_ok = false; + if (min_width_bit > port.wide_log2) + min_width_bit = port.wide_log2; + } + } + if (min_width_ok) { + int emu_wide_bits = max_wide_log2 - hard_wide_num; + int mult_wide = 1 << emu_wide_bits; + int addrs = 1 << (cfg.def->abits - base_width_log2 + emu_wide_bits); + int min_addr = mem.start_offset / addrs; + int max_addr = (mem.start_offset + mem.size - 1) / addrs; + int mult_a = max_addr - min_addr + 1; + int bits = mult_a * mult_wide * GetSize(swizzle); + int repl = (bits + unit_width - 1) / unit_width; + int score_demux = 0; + for (int i = 0; i < GetSize(mem.wr_ports); i++) { + auto &port = mem.wr_ports[i]; + int w = emu_wide_bits; + for (int i = 0; i < port.wide_log2; i++) + if (!(hard_wide_mask & 1 << i)) + w--; + if (w || mult_a != 1) + score_demux += (mult_a << w) * wren_size[i]; + } + int score_mux = 0; + for (auto &port: mem.rd_ports) { + int w = emu_wide_bits; + for (int i = 0; i < port.wide_log2; i++) + if (!(hard_wide_mask & 1 << i)) + w--; + score_mux += ((mult_a << w) - 1) * GetSize(port.data); + } + double cost = (cfg.def->cost - cfg.def->widthscale) * repl * cfg.repl_port; + cost += cfg.def->widthscale * mult_a * mult_wide * mem.width / unit_width * cfg.repl_port; + cost += score_mux * FACTOR_MUX; + cost += score_demux * FACTOR_DEMUX; + cost += cfg.score_emu * FACTOR_EMU; + if (!got_config || cost < best_cost) { + cfg.base_width_log2 = base_width_log2; + cfg.unit_width_log2 = unit_width_log2; + cfg.swizzle = swizzle; + cfg.hard_wide_mask = hard_wide_mask; + cfg.emu_wide_mask = ((1 << max_wide_log2) - 1) & ~hard_wide_mask; + cfg.repl_d = repl; + cfg.score_demux = score_demux; + cfg.score_mux = score_mux; + cfg.cost = cost; + best_cost = cost; + got_config = true; + } + } + if (cfg.def->width_mode != WidthMode::PerPort) + break; + // Now, pick the next bit to add to the hard wide mask. +next_hw: + int scan_from; + int scan_to; + bool retry = false; + if (!min_width_ok) { + // If we still haven't met the minimum width limits, + // add the highest one that will be useful for working + // towards all unmet limits. + scan_from = min_width_bit; + scan_to = 0; + // If the relevant write port is not wide, it's impossible. + } else if (byte_failed) { + // If we already failed with uniformly-written bits only, + // go with uniform bits that are only involved in reads. + scan_from = max_wide_log2; + scan_to = wide_nu_end; + } else if (base_width_log2 + hard_wide_num < byte_width_log2) { + // If we still need uniform bits, prefer the low ones. + scan_from = wide_nu_start; + scan_to = 0; + retry = true; + } else { + scan_from = max_wide_log2; + scan_to = 0; + } + int bit = scan_from - 1; + while (1) { + if (bit < scan_to) { +hw_bit_failed: + if (retry) { + byte_failed = true; + goto next_hw; + } else { + goto bw_done; + } + } + if (!(hard_wide_mask & 1 << bit) && !no_wide_bits.count(bit)) + break; + bit--; + } + int new_hw_mask = hard_wide_mask | 1 << bit; + // Check if all max width constraints are satisfied. + for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) { + auto &port = mem.wr_ports[pidx]; + int w = base_width_log2; + for (int i = 0; i < port.wide_log2; i++) + if (new_hw_mask & 1 << i) + w++; + if (w > cfg.wr_ports[pidx].def->max_wr_wide_log2) { + goto hw_bit_failed; + } + } + for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) { + auto &port = mem.rd_ports[pidx]; + int w = base_width_log2; + for (int i = 0; i < port.wide_log2; i++) + if (new_hw_mask & 1 << i) + w++; + if (w > cfg.rd_ports[pidx].def->max_rd_wide_log2) { + goto hw_bit_failed; + } + } + // Bit ok, commit. + hard_wide_mask = new_hw_mask; + hard_wide_num++; + } +bw_done:; + } + log_assert(got_config); + } +} + +void MemMapping::prune_post_geom() { + std::vector keep; + dict rsrc; + for (int i = 0; i < GetSize(cfgs); i++) { + auto &cfg = cfgs[i]; + std::string key = cfg.def->resource_name; + if (key.empty()) { + switch (cfg.def->kind) { + case RamKind::Distributed: + key = "[distributed]"; + break; + case RamKind::Block: + key = "[block]"; + break; + case RamKind::Huge: + key = "[huge]"; + break; + default: + break; + } + } + auto it = rsrc.find(key); + if (it == rsrc.end()) { + rsrc[key] = i; + keep.push_back(true); + } else { + auto &ocfg = cfgs[it->second]; + if (cfg.cost < ocfg.cost) { + keep[it->second] = false; + it->second = i; + keep.push_back(true); + } else { + keep.push_back(false); + } + } + } + MemConfigs new_cfgs; + for (int i = 0; i < GetSize(cfgs); i++) + if (keep[i]) + new_cfgs.push_back(cfgs[i]); + cfgs = new_cfgs; +} + +Swizzle gen_swizzle(const Mem &mem, const MemConfig &cfg, int sw_wide_log2, int hw_wide_log2) { + Swizzle res; + + std::vector emu_wide_bits; + std::vector hard_wide_bits; + for (int i = 0; i < ceil_log2(mem.size); i++) { + if (cfg.emu_wide_mask & 1 << i) + emu_wide_bits.push_back(i); + else if (GetSize(hard_wide_bits) < hw_wide_log2 - cfg.base_width_log2) + hard_wide_bits.push_back(i); + } + for (int x : hard_wide_bits) + if (x >= sw_wide_log2) + res.addr_mux_bits.push_back(x); + for (int x : emu_wide_bits) + if (x >= sw_wide_log2) + res.addr_mux_bits.push_back(x); + + res.addr_shift = cfg.def->abits - cfg.base_width_log2 + GetSize(emu_wide_bits); + res.addr_start = mem.start_offset & ~((1 << res.addr_shift) - 1); + res.addr_end = ((mem.start_offset + mem.size - 1) | ((1 << res.addr_shift) - 1)) + 1; + int hnum = (res.addr_end - res.addr_start) >> res.addr_shift; + int unit_width = cfg.def->dbits[cfg.unit_width_log2]; + + for (int rd = 0; rd < cfg.repl_d; rd++) { + std::vector bits(cfg.def->dbits[hw_wide_log2]); + for (auto &bit: bits) + bit.valid = false; + res.bits.push_back(bits); + } + + for (int hi = 0; hi < hnum; hi++) { + for (int ewi = 0; ewi < (1 << GetSize(emu_wide_bits)); ewi++) { + for (int hwi = 0; hwi < (1 << GetSize(hard_wide_bits)); hwi++) { + int mux_idx = 0; + int sub = 0; + int mib = 0; + int hbit_base = 0; + for (int i = 0; i < GetSize(hard_wide_bits); i++) { + if (hard_wide_bits[i] < sw_wide_log2) { + if (hwi & 1 << i) + sub |= 1 << hard_wide_bits[i]; + } else { + if (hwi & 1 << i) + mux_idx |= 1 << mib; + mib++; + } + if (hwi & 1 << i) + hbit_base += cfg.def->dbits[i + cfg.base_width_log2]; + } + for (int i = 0; i < GetSize(emu_wide_bits); i++) { + if (emu_wide_bits[i] < sw_wide_log2) { + if (ewi & 1 << i) + sub |= 1 << emu_wide_bits[i]; + } else { + if (ewi & 1 << i) + mux_idx |= 1 << mib; + mib++; + } + } + mux_idx |= hi << mib; + int addr = res.addr_start + (hi << res.addr_shift); + for (int i = 0; i < GetSize(res.addr_mux_bits); i++) + if (mux_idx & 1 << i) + addr += 1 << res.addr_mux_bits[i]; + for (int bit = 0; bit < GetSize(cfg.swizzle); bit++) { + if (cfg.swizzle[bit] == -1) + continue; + int rbit = bit + GetSize(cfg.swizzle) * (ewi + (hi << GetSize(emu_wide_bits))); + int rep = rbit / unit_width; + int hbit = hbit_base + rbit % unit_width; + auto &swz = res.bits[rep][hbit]; + swz.valid = true; + swz.addr = addr; + swz.mux_idx = mux_idx; + swz.bit = cfg.swizzle[bit] + sub * mem.width; + } + } + } + } + + return res; +} + +void clean_undef(std::vector &val) { + for (auto &bit : val) + if (bit != State::S1) + bit = State::S0; +} + +std::vector generate_demux(Mem &mem, int wpidx, const Swizzle &swz) { + auto &port = mem.wr_ports[wpidx]; + std::vector res; + int hi_bits = ceil_log2(swz.addr_end - swz.addr_start) - swz.addr_shift; + auto compressed = port.compress_en(); + SigSpec sig_a = compressed.first; + SigSpec addr = port.addr; + if (GetSize(addr) > hi_bits + swz.addr_shift) { + int lo = mem.start_offset; + int hi = mem.start_offset + mem.size; + int bits = ceil_log2(hi); + for (int i = 0; i < bits; i++) { + int new_lo = lo; + if (lo & 1 << i) + new_lo -= 1 << i; + int new_hi = hi; + if (hi & 1 << i) + new_hi += 1 << i; + if (new_hi - new_lo > (1 << (hi_bits + swz.addr_shift))) + break; + lo = new_lo; + hi = new_hi; + } + SigSpec in_range = mem.module->And(NEW_ID, mem.module->Ge(NEW_ID, addr, lo), mem.module->Lt(NEW_ID, addr, hi)); + sig_a = mem.module->Mux(NEW_ID, Const(State::S0, GetSize(sig_a)), sig_a, in_range); + } + addr.extend_u0(swz.addr_shift + hi_bits, false); + SigSpec sig_s; + for (int x : swz.addr_mux_bits) + sig_s.append(addr[x]); + for (int i = 0; i < hi_bits; i++) + sig_s.append(addr[swz.addr_shift + i]); + SigSpec sig_y; + if (GetSize(sig_s) == 0) + sig_y = sig_a; + else + sig_y = mem.module->Demux(NEW_ID, sig_a, sig_s); + for (int i = 0; i < ((swz.addr_end - swz.addr_start) >> swz.addr_shift); i++) { + for (int j = 0; j < (1 << GetSize(swz.addr_mux_bits)); j++) { + int hi = ((swz.addr_start >> swz.addr_shift) + i) & ((1 << hi_bits) - 1); + int pos = (hi << GetSize(swz.addr_mux_bits) | j) * GetSize(sig_a); + res.push_back(port.decompress_en(compressed.second, sig_y.extract(pos, GetSize(sig_a)))); + } + } + return res; +} + +std::vector generate_mux(Mem &mem, int rpidx, const Swizzle &swz) { + auto &port = mem.rd_ports[rpidx]; + std::vector res; + int hi_bits = ceil_log2(swz.addr_end - swz.addr_start) - swz.addr_shift; + SigSpec sig_s; + SigSpec addr = port.addr; + addr.extend_u0(swz.addr_shift + hi_bits, false); + for (int x : swz.addr_mux_bits) + sig_s.append(addr[x]); + for (int i = 0; i < hi_bits; i++) + sig_s.append(addr[swz.addr_shift + i]); + if (GetSize(sig_s) == 0) { + return {port.data}; + } + if (port.clk_enable) { + SigSpec new_sig_s = mem.module->addWire(NEW_ID, GetSize(sig_s)); + mem.module->addDffe(NEW_ID, port.clk, port.en, sig_s, new_sig_s, port.clk_polarity); + sig_s = new_sig_s; + } + SigSpec sig_a = Const(State::Sx, GetSize(port.data) << hi_bits << GetSize(swz.addr_mux_bits)); + for (int i = 0; i < ((swz.addr_end - swz.addr_start) >> swz.addr_shift); i++) { + for (int j = 0; j < (1 << GetSize(swz.addr_mux_bits)); j++) { + SigSpec sig = mem.module->addWire(NEW_ID, GetSize(port.data)); + int hi = ((swz.addr_start >> swz.addr_shift) + i) & ((1 << hi_bits) - 1); + int pos = (hi << GetSize(swz.addr_mux_bits) | j) * GetSize(port.data); + for (int k = 0; k < GetSize(port.data); k++) + sig_a[pos + k] = sig[k]; + res.push_back(sig); + } + } + mem.module->addBmux(NEW_ID, sig_a, sig_s, port.data); + return res; +} + +void MemMapping::emit_port(const MemConfig &cfg, std::vector &cells, const PortVariant &pdef, const char *name, int wpidx, int rpidx, const std::vector &hw_addr_swizzle) { + for (auto &it: pdef.options) + for (auto cell: cells) + cell->setParam(stringf("\\PORT_%s_OPTION_%s", name, it.first.c_str()), it.second); + SigSpec addr = Const(State::Sx, cfg.def->abits); + int wide_log2 = 0, wr_wide_log2 = 0, rd_wide_log2 = 0; + SigSpec clk = State::S0; + SigSpec clk_en = State::S0; + bool clk_pol = true; + if (wpidx != -1) { + auto &wport = mem.wr_ports[wpidx]; + clk = wport.clk; + clk_pol = wport.clk_polarity; + addr = wport.addr; + wide_log2 = wr_wide_log2 = wport.wide_log2; + if (rpidx != -1) { + auto &rport = mem.rd_ports[rpidx]; + auto &rpcfg = cfg.rd_ports[rpidx]; + rd_wide_log2 = rport.wide_log2; + if (rd_wide_log2 > wr_wide_log2) + wide_log2 = rd_wide_log2; + else + addr = rport.addr; + if (pdef.clk_en) { + if (rpcfg.rd_en_to_clk_en) { + if (pdef.rdwr == RdWrKind::NoChange) { + clk_en = mem.module->Or(NEW_ID, rport.en, mem.module->ReduceOr(NEW_ID, wport.en)); + } else { + clk_en = rport.en; + } + } else { + clk_en = State::S1; + } + } + } else { + if (pdef.clk_en) + clk_en = State::S1; + } + } else if (rpidx != -1) { + auto &rport = mem.rd_ports[rpidx]; + auto &rpcfg = cfg.rd_ports[rpidx]; + if (rport.clk_enable) { + clk = rport.clk; + clk_pol = rport.clk_polarity; + } + addr = rport.addr; + wide_log2 = rd_wide_log2 = rport.wide_log2; + if (pdef.clk_en) { + if (rpcfg.rd_en_to_clk_en) + clk_en = rport.en; + else + clk_en = State::S1; + } + + } + addr = worker.sigmap_xmux(addr); + if (pdef.kind != PortKind::Ar) { + switch (pdef.clk_pol) { + case ClkPolKind::Posedge: + if (!clk_pol) + clk = mem.module->Not(NEW_ID, clk); + break; + case ClkPolKind::Negedge: + if (clk_pol) + clk = mem.module->Not(NEW_ID, clk); + break; + case ClkPolKind::Anyedge: + for (auto cell: cells) + cell->setParam(stringf("\\PORT_%s_CLK_POL", name), clk_pol); + } + for (auto cell: cells) { + cell->setPort(stringf("\\PORT_%s_CLK", name), clk); + if (pdef.clk_en) + cell->setPort(stringf("\\PORT_%s_CLK_EN", name), clk_en); + } + } + + // Width determination. + if (pdef.width_tied) { + rd_wide_log2 = wr_wide_log2 = wide_log2; + } + int hw_wr_wide_log2 = cfg.base_width_log2; + for (int i = 0; i < wr_wide_log2; i++) + if (cfg.hard_wide_mask & (1 << i)) + hw_wr_wide_log2++; + if (hw_wr_wide_log2 < pdef.min_wr_wide_log2) + hw_wr_wide_log2 = pdef.min_wr_wide_log2; + if (hw_wr_wide_log2 > pdef.max_wr_wide_log2) + hw_wr_wide_log2 = pdef.max_wr_wide_log2; + int hw_rd_wide_log2 = cfg.base_width_log2; + for (int i = 0; i < rd_wide_log2; i++) + if (cfg.hard_wide_mask & (1 << i)) + hw_rd_wide_log2++; + if (hw_rd_wide_log2 < pdef.min_rd_wide_log2) + hw_rd_wide_log2 = pdef.min_rd_wide_log2; + if (hw_rd_wide_log2 > pdef.max_rd_wide_log2) + hw_rd_wide_log2 = pdef.max_rd_wide_log2; + if (pdef.width_tied) { + // For unused ports, pick max available width, + // in case narrow ports require disabling parity + // bits etc. + if (wpidx == -1 && rpidx == -1) { + hw_wr_wide_log2 = pdef.max_wr_wide_log2; + hw_rd_wide_log2 = pdef.max_rd_wide_log2; + } + } else { + if (wpidx == -1) + hw_wr_wide_log2 = pdef.max_wr_wide_log2; + if (rpidx == -1) + hw_rd_wide_log2 = pdef.max_rd_wide_log2; + } + if (cfg.def->width_mode == WidthMode::PerPort) { + for (auto cell: cells) { + if (pdef.width_tied) { + cell->setParam(stringf("\\PORT_%s_WIDTH", name), cfg.def->dbits[hw_wr_wide_log2]); + } else { + if (pdef.kind != PortKind::Ar && pdef.kind != PortKind::Sr) + cell->setParam(stringf("\\PORT_%s_WR_WIDTH", name), cfg.def->dbits[hw_wr_wide_log2]); + if (pdef.kind != PortKind::Sw) + cell->setParam(stringf("\\PORT_%s_RD_WIDTH", name), cfg.def->dbits[hw_rd_wide_log2]); + } + } + } + + // Address determination. + SigSpec hw_addr; + for (int x: hw_addr_swizzle) { + if (x == -1 || x >= GetSize(addr)) + hw_addr.append(State::S0); + else + hw_addr.append(addr[x]); + } + for (int i = 0; i < hw_wr_wide_log2 && i < hw_rd_wide_log2; i++) + hw_addr[i] = State::S0; + for (auto cell: cells) + cell->setPort(stringf("\\PORT_%s_ADDR", name), hw_addr); + + // Write part. + if (pdef.kind != PortKind::Ar && pdef.kind != PortKind::Sr) { + int width = cfg.def->dbits[hw_wr_wide_log2]; + int effective_byte = cfg.def->byte; + if (effective_byte == 0 || effective_byte > width) + effective_byte = width; + if (wpidx != -1) { + auto &wport = mem.wr_ports[wpidx]; + Swizzle port_swz = gen_swizzle(mem, cfg, wport.wide_log2, hw_wr_wide_log2); + std::vector big_wren = generate_demux(mem, wpidx, port_swz); + for (int rd = 0; rd < cfg.repl_d; rd++) { + auto cell = cells[rd]; + SigSpec hw_wdata; + SigSpec hw_wren; + for (auto &bit : port_swz.bits[rd]) { + if (!bit.valid) { + hw_wdata.append(State::Sx); + } else { + hw_wdata.append(wport.data[bit.bit]); + } + } + for (int i = 0; i < GetSize(port_swz.bits[rd]); i += effective_byte) { + auto &bit = port_swz.bits[rd][i]; + if (!bit.valid) { + hw_wren.append(State::S0); + } else { + hw_wren.append(big_wren[bit.mux_idx][bit.bit]); + } + } + cell->setPort(stringf("\\PORT_%s_WR_DATA", name), hw_wdata); + if (pdef.wrbe_separate) { + // TODO make some use of it + SigSpec en = mem.module->ReduceOr(NEW_ID, hw_wren); + cell->setPort(stringf("\\PORT_%s_WR_EN", name), en); + cell->setPort(stringf("\\PORT_%s_WR_BE", name), hw_wren); + if (cfg.def->width_mode != WidthMode::Single) + cell->setParam(stringf("\\PORT_%s_WR_BE_WIDTH", name), GetSize(hw_wren)); + } else { + cell->setPort(stringf("\\PORT_%s_WR_EN", name), hw_wren); + if (cfg.def->byte != 0 && cfg.def->width_mode != WidthMode::Single) + cell->setParam(stringf("\\PORT_%s_WR_EN_WIDTH", name), GetSize(hw_wren)); + } + } + } else { + for (auto cell: cells) { + cell->setPort(stringf("\\PORT_%s_WR_DATA", name), Const(State::Sx, width)); + SigSpec hw_wren = Const(State::S0, width / effective_byte); + if (pdef.wrbe_separate) { + cell->setPort(stringf("\\PORT_%s_WR_EN", name), State::S0); + cell->setPort(stringf("\\PORT_%s_WR_BE", name), hw_wren); + cell->setParam(stringf("\\PORT_%s_WR_BE_WIDTH", name), GetSize(hw_wren)); + } else { + cell->setPort(stringf("\\PORT_%s_WR_EN", name), hw_wren); + if (cfg.def->byte != 0) + cell->setParam(stringf("\\PORT_%s_WR_EN_WIDTH", name), GetSize(hw_wren)); + } + } + } + } + + // Read part. + if (pdef.kind != PortKind::Sw) { + int width = cfg.def->dbits[hw_rd_wide_log2]; + if (rpidx != -1) { + auto &rport = mem.rd_ports[rpidx]; + auto &rpcfg = cfg.rd_ports[rpidx]; + Swizzle port_swz = gen_swizzle(mem, cfg, rport.wide_log2, hw_rd_wide_log2); + std::vector big_rdata = generate_mux(mem, rpidx, port_swz); + for (int rd = 0; rd < cfg.repl_d; rd++) { + auto cell = cells[rd]; + if (pdef.kind == PortKind::Sr || pdef.kind == PortKind::Srsw) { + if (pdef.rd_en) + cell->setPort(stringf("\\PORT_%s_RD_EN", name), rpcfg.rd_en_to_clk_en ? State::S1 : rport.en); + if (pdef.rdarstval != ResetValKind::None) + cell->setPort(stringf("\\PORT_%s_RD_ARST", name), rport.arst); + if (pdef.rdsrstval != ResetValKind::None) + cell->setPort(stringf("\\PORT_%s_RD_SRST", name), rport.srst); + if (pdef.rdinitval == ResetValKind::Any || pdef.rdinitval == ResetValKind::NoUndef) { + Const val = rport.init_value; + if (pdef.rdarstval == ResetValKind::Init && rport.arst != State::S0) { + log_assert(val.is_fully_undef() || val == rport.arst_value); + val = rport.arst_value; + } + if (pdef.rdsrstval == ResetValKind::Init && rport.srst != State::S0) { + log_assert(val.is_fully_undef() || val == rport.srst_value); + val = rport.srst_value; + } + std::vector hw_val; + for (auto &bit : port_swz.bits[rd]) { + if (!bit.valid) { + hw_val.push_back(State::Sx); + } else { + hw_val.push_back(val.bits[bit.bit]); + } + } + if (pdef.rdinitval == ResetValKind::NoUndef) + clean_undef(hw_val); + cell->setParam(stringf("\\PORT_%s_RD_INIT_VALUE", name), hw_val); + } + if (pdef.rdarstval == ResetValKind::Any || pdef.rdarstval == ResetValKind::NoUndef) { + std::vector hw_val; + for (auto &bit : port_swz.bits[rd]) { + if (!bit.valid) { + hw_val.push_back(State::Sx); + } else { + hw_val.push_back(rport.arst_value.bits[bit.bit]); + } + } + if (pdef.rdarstval == ResetValKind::NoUndef) + clean_undef(hw_val); + cell->setParam(stringf("\\PORT_%s_RD_ARST_VALUE", name), hw_val); + } + if (pdef.rdsrstval == ResetValKind::Any || pdef.rdsrstval == ResetValKind::NoUndef) { + std::vector hw_val; + for (auto &bit : port_swz.bits[rd]) { + if (!bit.valid) { + hw_val.push_back(State::Sx); + } else { + hw_val.push_back(rport.srst_value.bits[bit.bit]); + } + } + if (pdef.rdsrstval == ResetValKind::NoUndef) + clean_undef(hw_val); + cell->setParam(stringf("\\PORT_%s_RD_SRST_VALUE", name), hw_val); + } + } + SigSpec hw_rdata = mem.module->addWire(NEW_ID, width); + cell->setPort(stringf("\\PORT_%s_RD_DATA", name), hw_rdata); + SigSpec lhs; + SigSpec rhs; + for (int i = 0; i < GetSize(hw_rdata); i++) { + auto &bit = port_swz.bits[rd][i]; + if (bit.valid) { + lhs.append(big_rdata[bit.mux_idx][bit.bit]); + rhs.append(hw_rdata[i]); + } + } + mem.module->connect(lhs, rhs); + } + } else { + for (auto cell: cells) { + if (pdef.kind == PortKind::Sr || pdef.kind == PortKind::Srsw) { + if (pdef.rd_en) + cell->setPort(stringf("\\PORT_%s_RD_EN", name), State::S0); + if (pdef.rdarstval != ResetValKind::None) + cell->setPort(stringf("\\PORT_%s_RD_ARST", name), State::S0); + if (pdef.rdsrstval != ResetValKind::None) + cell->setPort(stringf("\\PORT_%s_RD_SRST", name), State::S0); + if (pdef.rdinitval == ResetValKind::Any) + cell->setParam(stringf("\\PORT_%s_RD_INIT_VALUE", name), Const(State::Sx, width)); + else if (pdef.rdinitval == ResetValKind::NoUndef) + cell->setParam(stringf("\\PORT_%s_RD_INIT_VALUE", name), Const(State::S0, width)); + if (pdef.rdarstval == ResetValKind::Any) + cell->setParam(stringf("\\PORT_%s_RD_ARST_VALUE", name), Const(State::Sx, width)); + else if (pdef.rdarstval == ResetValKind::NoUndef) + cell->setParam(stringf("\\PORT_%s_RD_ARST_VALUE", name), Const(State::S0, width)); + if (pdef.rdsrstval == ResetValKind::Any) + cell->setParam(stringf("\\PORT_%s_RD_SRST_VALUE", name), Const(State::Sx, width)); + else if (pdef.rdsrstval == ResetValKind::NoUndef) + cell->setParam(stringf("\\PORT_%s_RD_SRST_VALUE", name), Const(State::S0, width)); + } + SigSpec hw_rdata = mem.module->addWire(NEW_ID, width); + cell->setPort(stringf("\\PORT_%s_RD_DATA", name), hw_rdata); + } + } + } +} + +void MemMapping::emit(const MemConfig &cfg) { + log("mapping memory %s.%s via %s\n", log_id(mem.module->name), log_id(mem.memid), log_id(cfg.def->id)); + // First, handle emulations. + if (cfg.emu_read_first) + mem.emulate_read_first(&worker.initvals); + for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) { + auto &pcfg = cfg.rd_ports[pidx]; + auto &port = mem.rd_ports[pidx]; + if (pcfg.emu_sync) + mem.extract_rdff(pidx, &worker.initvals); + else if (pcfg.emu_en) + mem.emulate_rden(pidx, &worker.initvals); + else { + if (pcfg.emu_srst_en_prio) { + if (port.ce_over_srst) + mem.emulate_rd_ce_over_srst(pidx); + else + mem.emulate_rd_srst_over_ce(pidx); + } + mem.emulate_reset(pidx, pcfg.emu_init, pcfg.emu_arst, pcfg.emu_srst, &worker.initvals); + } + } + for (int pidx = 0; pidx < GetSize(mem.wr_ports); pidx++) { + auto &pcfg = cfg.wr_ports[pidx]; + for (int opidx: pcfg.emu_prio) { + mem.emulate_priority(opidx, pidx, &worker.initvals); + } + } + for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) { + auto &port = mem.rd_ports[pidx]; + auto &pcfg = cfg.rd_ports[pidx]; + for (int opidx: pcfg.emu_trans) { + // The port may no longer be transparent due to transparency being + // nuked as part of emu_sync or emu_prio. + if (port.transparency_mask[opidx]) + mem.emulate_transparency(opidx, pidx, &worker.initvals); + } + } + + // tgt (repl, port group, port) -> mem (wr port, rd port), where -1 means no port. + std::vector>>> ports(cfg.repl_port); + for (int i = 0; i < cfg.repl_port; i++) + ports[i].resize(cfg.def->port_groups.size()); + for (int i = 0; i < GetSize(cfg.wr_ports); i++) { + auto &pcfg = cfg.wr_ports[i]; + for (int j = 0; j < cfg.repl_port; j++) { + if (j == 0) { + ports[j][pcfg.port_group].push_back({i, pcfg.rd_port}); + } else { + ports[j][pcfg.port_group].push_back({i, -1}); + } + } + } + for (int i = 0; i < GetSize(cfg.rd_ports); i++) { + auto &pcfg = cfg.rd_ports[i]; + if (pcfg.wr_port != -1) + continue; + auto &pg = cfg.def->port_groups[pcfg.port_group]; + int j = 0; + while (GetSize(ports[j][pcfg.port_group]) >= GetSize(pg.names)) + j++; + ports[j][pcfg.port_group].push_back({-1, i}); + } + + Swizzle init_swz = gen_swizzle(mem, cfg, 0, GetSize(cfg.def->dbits) - 1); + Const init_data = mem.get_init_data(); + + std::vector hw_addr_swizzle; + for (int i = 0; i < cfg.base_width_log2; i++) + hw_addr_swizzle.push_back(-1); + for (int i = 0; i < init_swz.addr_shift; i++) + if (!(cfg.emu_wide_mask & 1 << i)) + hw_addr_swizzle.push_back(i); + log_assert(GetSize(hw_addr_swizzle) == cfg.def->abits); + + for (int rp = 0; rp < cfg.repl_port; rp++) { + std::vector cells; + for (int rd = 0; rd < cfg.repl_d; rd++) { + Cell *cell = mem.module->addCell(stringf("%s.%d.%d", mem.memid.c_str(), rp, rd), cfg.def->id); + if (cfg.def->width_mode == WidthMode::Global) + cell->setParam(ID::WIDTH, cfg.def->dbits[cfg.base_width_log2]); + if (cfg.def->widthscale) { + std::vector val; + for (auto &bit: init_swz.bits[rd]) + val.push_back(bit.valid ? State::S1 : State::S0); + cell->setParam(ID::BITS_USED, val); + } + for (auto &it: cfg.def->options) + cell->setParam(stringf("\\OPTION_%s", it.first.c_str()), it.second); + for (int i = 0; i < GetSize(cfg.def->shared_clocks); i++) { + auto &cdef = cfg.def->shared_clocks[i]; + auto &ccfg = cfg.shared_clocks[i]; + if (cdef.anyedge) { + cell->setParam(stringf("\\CLK_%s_POL", cdef.name.c_str()), ccfg.used ? ccfg.polarity : true); + cell->setPort(stringf("\\CLK_%s", cdef.name.c_str()), ccfg.used ? ccfg.clk : State::S0); + } else { + SigSpec sig = ccfg.used ? ccfg.clk : State::S0; + if (ccfg.used && ccfg.invert) + sig = mem.module->Not(NEW_ID, sig); + cell->setPort(stringf("\\CLK_%s", cdef.name.c_str()), sig); + } + } + if (cfg.def->init == MemoryInitKind::Any || cfg.def->init == MemoryInitKind::NoUndef) { + std::vector initval; + for (int hwa = 0; hwa < (1 << cfg.def->abits); hwa += 1 << (GetSize(cfg.def->dbits) - 1)) { + for (auto &bit: init_swz.bits[rd]) { + if (!bit.valid) { + initval.push_back(State::Sx); + } else { + int addr = bit.addr; + for (int i = GetSize(cfg.def->dbits) - 1; i < cfg.def->abits; i++) + if (hwa & 1 << i) + addr += 1 << hw_addr_swizzle[i]; + if (addr >= mem.start_offset && addr < mem.start_offset + mem.size) + initval.push_back(init_data.bits[(addr - mem.start_offset) * mem.width + bit.bit]); + else + initval.push_back(State::Sx); + } + } + } + if (cfg.def->init == MemoryInitKind::NoUndef) + clean_undef(initval); + cell->setParam(ID::INIT, initval); + } + cells.push_back(cell); + } + for (int pgi = 0; pgi < GetSize(cfg.def->port_groups); pgi++) { + auto &pg = cfg.def->port_groups[pgi]; + for (int pi = 0; pi < GetSize(pg.names); pi++) { + bool used = pi < GetSize(ports[rp][pgi]); + bool used_r = false; + bool used_w = false; + if (used) { + auto &pd = ports[rp][pgi][pi]; + const PortVariant *pdef; + if (pd.first != -1) + pdef = cfg.wr_ports[pd.first].def; + else + pdef = cfg.rd_ports[pd.second].def; + used_w = pd.first != -1; + used_r = pd.second != -1; + emit_port(cfg, cells, *pdef, pg.names[pi].c_str(), pd.first, pd.second, hw_addr_swizzle); + } else { + emit_port(cfg, cells, pg.variants[0], pg.names[pi].c_str(), -1, -1, hw_addr_swizzle); + } + if (pg.optional) + for (auto cell: cells) + cell->setParam(stringf("\\PORT_%s_USED", pg.names[pi].c_str()), used); + if (pg.optional_rw) + for (auto cell: cells) { + cell->setParam(stringf("\\PORT_%s_RD_USED", pg.names[pi].c_str()), used_r); + cell->setParam(stringf("\\PORT_%s_WR_USED", pg.names[pi].c_str()), used_w); + } + } + } + } + mem.remove(); +} + +struct MemoryLibMapPass : public Pass { + MemoryLibMapPass() : Pass("memory_libmap", "map memories to cells") { } + void help() override + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" memory_libmap -lib [-D ] [selection]\n"); + log("\n"); + log("This pass takes a description of available RAM cell types and maps\n"); + log("all selected memories to one of them, or leaves them to be mapped to FFs.\n"); + log("\n"); + log(" -lib \n"); + log(" Selects a library file containing RAM cell definitions. This option\n"); + log(" can be passed more than once to select multiple libraries.\n"); + log(" See passes/memory/memlib.md for description of the library format.\n"); + log("\n"); + log(" -D \n"); + log(" Enables a condition that can be checked within the library file\n"); + log(" to eg. select between slightly different hardware variants.\n"); + log(" This option can be passed any number of times.\n"); + log("\n"); + log(" -logic-cost-rom \n"); + log(" -logic-cost-ram \n"); + log(" Sets the cost of a single bit for memory lowered to soft logic.\n"); + log("\n"); + log(" -no-auto-distributed\n"); + log(" -no-auto-block\n"); + log(" -no-auto-huge\n"); + log(" Disables automatic mapping of given kind of RAMs. Manual mapping\n"); + log(" (using ram_style or other attributes) is still supported.\n"); + log("\n"); + } + void execute(std::vector args, RTLIL::Design *design) override + { + std::vector lib_files; + pool defines; + PassOptions opts; + opts.no_auto_distributed = false; + opts.no_auto_block = false; + opts.no_auto_huge = false; + opts.logic_cost_ram = 1.0; + opts.logic_cost_rom = 1.0/16.0; + log_header(design, "Executing MEMORY_LIBMAP pass (mapping memories to cells).\n"); + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-lib" && argidx+1 < args.size()) { + lib_files.push_back(args[++argidx]); + continue; + } + if (args[argidx] == "-D" && argidx+1 < args.size()) { + defines.insert(args[++argidx]); + continue; + } + if (args[argidx] == "-no-auto-distributed") { + opts.no_auto_distributed = true; + continue; + } + if (args[argidx] == "-no-auto-block") { + opts.no_auto_block = true; + continue; + } + if (args[argidx] == "-no-auto-huge") { + opts.no_auto_huge = true; + continue; + } + if (args[argidx] == "-logic-cost-rom" && argidx+1 < args.size()) { + opts.logic_cost_rom = strtod(args[++argidx].c_str(), nullptr); + continue; + } + if (args[argidx] == "-logic-cost-ram" && argidx+1 < args.size()) { + opts.logic_cost_ram = strtod(args[++argidx].c_str(), nullptr); + continue; + } + break; + } + extra_args(args, argidx, design); + + Library lib = parse_library(lib_files, defines); + + for (auto module : design->selected_modules()) { + MapWorker worker(module); + auto mems = Mem::get_selected_memories(module); + for (auto &mem : mems) + { + MemMapping map(worker, mem, lib, opts); + int idx = -1; + int best = map.logic_cost; + if (!map.logic_ok) { + if (map.cfgs.empty()) + log_error("no valid mapping found for memory %s.%s\n", log_id(module->name), log_id(mem.memid)); + idx = 0; + best = map.cfgs[0].cost; + } + for (int i = 0; i < GetSize(map.cfgs); i++) { + if (map.cfgs[i].cost < best) { + idx = i; + best = map.cfgs[i].cost; + } + } + if (idx == -1) { + log("using FF mapping for memory %s.%s\n", log_id(module->name), log_id(mem.memid)); + } else { + map.emit(map.cfgs[idx]); + } + } + } + } +} MemoryLibMapPass; + +PRIVATE_NAMESPACE_END -- 2.30.2