xilinx_dsp: Initial DSP48A/DSP48A1 support.
authorMarcin Kościelnicki <mwk@0x04.net>
Sun, 22 Dec 2019 19:43:39 +0000 (20:43 +0100)
committerMarcin Kościelnicki <mwk@0x04.net>
Sun, 22 Dec 2019 19:51:14 +0000 (20:51 +0100)
passes/pmgen/Makefile.inc
passes/pmgen/xilinx_dsp.cc
passes/pmgen/xilinx_dsp48a.pmg [new file with mode: 0644]
passes/pmgen/xilinx_dsp_CREG.pmg
techlibs/xilinx/synth_xilinx.cc
techlibs/xilinx/xc3sda_dsp_map.v
techlibs/xilinx/xc6s_dsp_map.v
tests/arch/xilinx/macc.sh
tests/arch/xilinx/mul.ys
tests/arch/xilinx/mul_unsigned.ys

index 145d2ebf989a0957f2aa700764c220a40dda2eba..1a57bef7d944f2aa0ffd08d6f0668462daf443b8 100644 (file)
@@ -22,8 +22,9 @@ $(eval $(call add_extra_objs,passes/pmgen/ice40_wrapcarry_pm.h))
 # --------------------------------------
 
 OBJS += passes/pmgen/xilinx_dsp.o
-passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h
+passes/pmgen/xilinx_dsp.o: passes/pmgen/xilinx_dsp_pm.h passes/pmgen/xilinx_dsp48a_pm.h passes/pmgen/xilinx_dsp_CREG_pm.h passes/pmgen/xilinx_dsp_cascade_pm.h
 $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_pm.h))
+$(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp48a_pm.h))
 $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_CREG_pm.h))
 $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_cascade_pm.h))
 
index 054e123e49ab7486e76870b2aa77aa77c448e575..81c3c57c4bcb54746705f9aed58ab271b65c5a79 100644 (file)
@@ -26,6 +26,7 @@ USING_YOSYS_NAMESPACE
 PRIVATE_NAMESPACE_BEGIN
 
 #include "passes/pmgen/xilinx_dsp_pm.h"
+#include "passes/pmgen/xilinx_dsp48a_pm.h"
 #include "passes/pmgen/xilinx_dsp_CREG_pm.h"
 #include "passes/pmgen/xilinx_dsp_cascade_pm.h"
 
@@ -487,6 +488,190 @@ void xilinx_dsp_pack(xilinx_dsp_pm &pm)
        pm.blacklist(cell);
 }
 
+void xilinx_dsp48a_pack(xilinx_dsp48a_pm &pm)
+{
+       auto &st = pm.st_xilinx_dsp48a_pack;
+
+       log("Analysing %s.%s for Xilinx DSP48A/DSP48A1 packing.\n", log_id(pm.module), log_id(st.dsp));
+
+       log_debug("preAdd:     %s\n", log_id(st.preAdd, "--"));
+       log_debug("ffA1:       %s %s %s\n", log_id(st.ffA1, "--"), log_id(st.ffA1cemux, "--"), log_id(st.ffA1rstmux, "--"));
+       log_debug("ffA0:       %s %s %s\n", log_id(st.ffA0, "--"), log_id(st.ffA0cemux, "--"), log_id(st.ffA0rstmux, "--"));
+       log_debug("ffB1:       %s %s %s\n", log_id(st.ffB1, "--"), log_id(st.ffB1cemux, "--"), log_id(st.ffB1rstmux, "--"));
+       log_debug("ffB0:       %s %s %s\n", log_id(st.ffB0, "--"), log_id(st.ffB0cemux, "--"), log_id(st.ffB0rstmux, "--"));
+       log_debug("ffD:        %s %s %s\n", log_id(st.ffD, "--"), log_id(st.ffDcemux, "--"), log_id(st.ffDrstmux, "--"));
+       log_debug("dsp:        %s\n", log_id(st.dsp, "--"));
+       log_debug("ffM:        %s %s %s\n", log_id(st.ffM, "--"), log_id(st.ffMcemux, "--"), log_id(st.ffMrstmux, "--"));
+       log_debug("postAdd:    %s\n", log_id(st.postAdd, "--"));
+       log_debug("postAddMux: %s\n", log_id(st.postAddMux, "--"));
+       log_debug("ffP:        %s %s %s\n", log_id(st.ffP, "--"), log_id(st.ffPcemux, "--"), log_id(st.ffPrstmux, "--"));
+
+       Cell *cell = st.dsp;
+       SigSpec &opmode = cell->connections_.at(ID(OPMODE));
+
+       if (st.preAdd) {
+               log("  preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type));
+               bool D_SIGNED = st.preAdd->getParam(ID(A_SIGNED)).as_bool();
+               bool B_SIGNED = st.preAdd->getParam(ID(B_SIGNED)).as_bool();
+               st.sigB.extend_u0(18, B_SIGNED);
+               st.sigD.extend_u0(18, D_SIGNED);
+               cell->setPort(ID(B), st.sigB);
+               cell->setPort(ID(D), st.sigD);
+               opmode[4] = State::S1;
+               if (st.preAdd->type == ID($add))
+                       opmode[6] = State::S0;
+               else if (st.preAdd->type == ID($sub))
+                       opmode[6] = State::S1;
+               else
+                       log_assert(!"strange pre-adder type");
+
+               pm.autoremove(st.preAdd);
+       }
+       if (st.postAdd) {
+               log("  postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type));
+
+               if (st.postAddMux) {
+                       log_assert(st.ffP);
+                       opmode[2] = st.postAddMux->getPort(ID(S));
+                       pm.autoremove(st.postAddMux);
+               }
+               else if (st.ffP && st.sigC == st.sigP)
+                       opmode[2] = State::S0;
+               else
+                       opmode[2] = State::S1;
+               opmode[3] = State::S1;
+
+               if (opmode[2] != State::S0) {
+                       if (st.postAddMuxAB == ID(A))
+                               st.sigC.extend_u0(48, st.postAdd->getParam(ID(B_SIGNED)).as_bool());
+                       else
+                               st.sigC.extend_u0(48, st.postAdd->getParam(ID(A_SIGNED)).as_bool());
+                       cell->setPort(ID(C), st.sigC);
+               }
+
+               pm.autoremove(st.postAdd);
+       }
+
+       if (st.clock != SigBit())
+       {
+               cell->setPort(ID(CLK), st.clock);
+
+               auto f = [&pm,cell](SigSpec &A, Cell* ff, Cell* cemux, bool cepol, IdString ceport, Cell* rstmux, bool rstpol, IdString rstport) {
+                       SigSpec D = ff->getPort(ID(D));
+                       SigSpec Q = pm.sigmap(ff->getPort(ID(Q)));
+                       if (!A.empty())
+                               A.replace(Q, D);
+                       if (rstmux) {
+                               SigSpec Y = rstmux->getPort(ID(Y));
+                               SigSpec AB = rstmux->getPort(rstpol ? ID(A) : ID(B));
+                               if (!A.empty())
+                                       A.replace(Y, AB);
+                               if (rstport != IdString()) {
+                                       SigSpec S = rstmux->getPort(ID(S));
+                                       cell->setPort(rstport, rstpol ? S : pm.module->Not(NEW_ID, S));
+                               }
+                       }
+                       else if (rstport != IdString())
+                               cell->setPort(rstport, State::S0);
+                       if (cemux) {
+                               SigSpec Y = cemux->getPort(ID(Y));
+                               SigSpec BA = cemux->getPort(cepol ? ID(B) : ID(A));
+                               SigSpec S = cemux->getPort(ID(S));
+                               if (!A.empty())
+                                       A.replace(Y, BA);
+                               cell->setPort(ceport, cepol ? S : pm.module->Not(NEW_ID, S));
+                       }
+                       else
+                               cell->setPort(ceport, State::S1);
+
+                       for (auto c : Q.chunks()) {
+                               auto it = c.wire->attributes.find(ID(init));
+                               if (it == c.wire->attributes.end())
+                                       continue;
+                               for (int i = c.offset; i < c.offset+c.width; i++) {
+                                       log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
+                                       it->second[i] = State::Sx;
+                               }
+                       }
+               };
+
+               if (st.ffA0 || st.ffA1) {
+                       SigSpec A = cell->getPort(ID(A));
+                       if (st.ffA1) {
+                               f(A, st.ffA1, st.ffA1cemux, st.ffAcepol, ID(CEA), st.ffA1rstmux, st.ffArstpol, ID(RSTA));
+                               cell->setParam(ID(A1REG), 1);
+                       }
+                       if (st.ffA0) {
+                               f(A, st.ffA0, st.ffA0cemux, st.ffAcepol, ID(CEA), st.ffA0rstmux, st.ffArstpol, ID(RSTA));
+                               cell->setParam(ID(A0REG), 1);
+                       }
+                       pm.add_siguser(A, cell);
+                       cell->setPort(ID(A), A);
+               }
+               if (st.ffB0 || st.ffB1) {
+                       SigSpec B = cell->getPort(ID(B));
+                       if (st.ffB1) {
+                               f(B, st.ffB1, st.ffB1cemux, st.ffBcepol, ID(CEB), st.ffB1rstmux, st.ffBrstpol, ID(RSTB));
+                               cell->setParam(ID(B1REG), 1);
+                       }
+                       if (st.ffB0) {
+                               f(B, st.ffB0, st.ffB0cemux, st.ffBcepol, ID(CEB), st.ffB0rstmux, st.ffBrstpol, ID(RSTB));
+                               cell->setParam(ID(B0REG), 1);
+                       }
+                       pm.add_siguser(B, cell);
+                       cell->setPort(ID(B), B);
+               }
+               if (st.ffD) {
+                       SigSpec D = cell->getPort(ID(D));
+                       f(D, st.ffD, st.ffDcemux, st.ffDcepol, ID(CED), st.ffDrstmux, st.ffDrstpol, ID(RSTD));
+                       pm.add_siguser(D, cell);
+                       cell->setPort(ID(D), D);
+                       cell->setParam(ID(DREG), 1);
+               }
+               if (st.ffM) {
+                       SigSpec M; // unused
+                       f(M, st.ffM, st.ffMcemux, st.ffMcepol, ID(CEM), st.ffMrstmux, st.ffMrstpol, ID(RSTM));
+                       st.ffM->connections_.at(ID(Q)).replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM)));
+                       cell->setParam(ID(MREG), State::S1);
+               }
+               if (st.ffP) {
+                       SigSpec P; // unused
+                       f(P, st.ffP, st.ffPcemux, st.ffPcepol, ID(CEP), st.ffPrstmux, st.ffPrstpol, ID(RSTP));
+                       st.ffP->connections_.at(ID(Q)).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP)));
+                       cell->setParam(ID(PREG), State::S1);
+               }
+
+               log("  clock: %s (%s)", log_signal(st.clock), "posedge");
+
+               if (st.ffA0)
+                       log(" ffA0:%s", log_id(st.ffA0));
+               if (st.ffA1)
+                       log(" ffA1:%s", log_id(st.ffA1));
+
+               if (st.ffB0)
+                       log(" ffB0:%s", log_id(st.ffB0));
+               if (st.ffB1)
+                       log(" ffB1:%s", log_id(st.ffB1));
+
+               if (st.ffD)
+                       log(" ffD:%s", log_id(st.ffD));
+
+               if (st.ffM)
+                       log(" ffM:%s", log_id(st.ffM));
+
+               if (st.ffP)
+                       log(" ffP:%s", log_id(st.ffP));
+       }
+       log("\n");
+
+       SigSpec P = st.sigP;
+       if (GetSize(P) < 48)
+               P.append(pm.module->addWire(NEW_ID, 48-GetSize(P)));
+       cell->setPort(ID(P), P);
+
+       pm.blacklist(cell);
+}
+
 void xilinx_dsp_packC(xilinx_dsp_CREG_pm &pm)
 {
        auto &st = pm.st_xilinx_dsp_packC;
@@ -592,33 +777,48 @@ struct XilinxDspPass : public Pass {
                log("P output implementing the operation \"(P >= <power-of-2>)\" will be transformed\n");
                log("into using the DSP48E1's pattern detector feature for overflow detection.\n");
                log("\n");
+               log("    -family {xcup|xcu|xc7|xc6v|xc5v|xc4v|xc6s|xc3sda}\n");
+               log("        select the family to target\n");
+               log("        default: xc7\n");
+               log("\n");
        }
        void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
        {
                log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n");
 
+               std::string family = "xc7";
                size_t argidx;
                for (argidx = 1; argidx < args.size(); argidx++)
                {
-                       // if (args[argidx] == "-singleton") {
-                       //      singleton_mode = true;
-                       //      continue;
-                       // }
+                       if ((args[argidx] == "-family" || args[argidx] == "-arch") && argidx+1 < args.size()) {
+                               family = args[++argidx];
+                               continue;
+                       }
                        break;
                }
                extra_args(args, argidx, design);
 
+               // Don't bother distinguishing between those.
+               if (family == "xc6v")
+                       family = "xc7";
+               if (family == "xcup")
+                       family = "xcu";
+
                for (auto module : design->selected_modules()) {
                        // Experimental feature: pack $add/$sub cells with
                        //   (* use_dsp48="simd" *) into DSP48E1's using its
                        //   SIMD feature
-                       xilinx_simd_pack(module, module->selected_cells());
+                       if (family == "xc7")
+                               xilinx_simd_pack(module, module->selected_cells());
 
                        // Match for all features ([ABDMP][12]?REG, pre-adder,
                        // post-adder, pattern detector, etc.) except for CREG
-                       {
+                       if (family == "xc7") {
                                xilinx_dsp_pm pm(module, module->selected_cells());
                                pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
+                       } else if (family == "xc6s" || family == "xc3sda") {
+                               xilinx_dsp48a_pm pm(module, module->selected_cells());
+                               pm.run_xilinx_dsp48a_pack(xilinx_dsp48a_pack);
                        }
                        // Separating out CREG packing is necessary since there
                        //   is no guarantee that the cell ordering corresponds
diff --git a/passes/pmgen/xilinx_dsp48a.pmg b/passes/pmgen/xilinx_dsp48a.pmg
new file mode 100644 (file)
index 0000000..97d5c5c
--- /dev/null
@@ -0,0 +1,673 @@
+// This file describes the main pattern matcher setup (of three total) that
+//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc — version for
+//   DSP48A/DSP48A1 (Spartan 3A DSP, Spartan 6).
+// At a high level, it works as follows:
+//   ( 1) Starting from a DSP48A/DSP48A1 cell
+//   ( 2) Match the driver of the 'B' input to a possible $dff cell (B1REG)
+//        (attached to at most two $mux cells that implement clock-enable or
+//         reset functionality, using a subpattern discussed below)
+//        If B1REG matched, treat 'B' input as input of B1REG
+//   ( 3) Match the driver of the 'B' and 'D' inputs for a possible $add cell
+//       (pre-adder)
+//   ( 4) Match 'B' input for B0REG
+//   ( 5) Match 'A' input for A1REG
+//        If A1REG, then match 'A' input for A0REG
+//   ( 6) Match 'D' input for DREG
+//   ( 7) Match 'P' output that exclusively drives an MREG
+//   ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
+//        cell (post-adder).
+//        The other input to the adder is assumed to come in from the 'C' input
+//        (note: 'P' -> 'C' connections that exist for accumulators are
+//         recognised in xilinx_dsp.cc).
+//   ( 9) Match 'P' output that exclusively drives a PREG
+//   (10) If post-adder and PREG both present, match for a $mux cell driving
+//        the 'C' input, where one of the $mux's inputs is the PREG output.
+//        This indicates an accumulator situation, and one where a $mux exists
+//        to override the accumulated value:
+//             +--------------------------------+
+//             |   ____                         |
+//             +--|    \                        |
+//                |$mux|-+                      |
+//         'C' ---|____/ |                      |
+//                       | /-------\   +----+   |
+//            +----+     +-| post- |___|PREG|---+ 'P'
+//            |MREG|------ | adder |   +----+
+//            +----+       \-------/
+// Notes: see the notes in xilinx_dsp.pmg
+
+pattern xilinx_dsp48a_pack
+
+state <SigBit> clock
+state <SigSpec> sigA sigB sigC sigD sigM sigP
+state <IdString> postAddAB postAddMuxAB
+state <bool> ffAcepol ffBcepol ffDcepol ffMcepol ffPcepol
+state <bool> ffArstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
+state <Cell*> ffA0 ffA0cemux ffA0rstmux ffA1 ffA1cemux ffA1rstmux
+state <Cell*> ffB0 ffB0cemux ffB0rstmux ffB1 ffB1cemux ffB1rstmux
+state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
+
+// Variables used for subpatterns
+state <SigSpec> argQ argD
+state <bool> ffcepol ffrstpol
+state <int> ffoffset
+udata <SigSpec> dffD dffQ
+udata <SigBit> dffclock
+udata <Cell*> dff dffcemux dffrstmux
+udata <bool> dffcepol dffrstpol
+
+// (1) Starting from a DSP48A/DSP48A1 cell
+match dsp
+       select dsp->type.in(\DSP48A, \DSP48A1)
+endmatch
+
+code sigA sigB sigC sigD sigM clock
+       auto unextend = [](const SigSpec &sig) {
+               int i;
+               for (i = GetSize(sig)-1; i > 0; i--)
+                       if (sig[i] != sig[i-1])
+                               break;
+               // Do not remove non-const sign bit
+               if (sig[i].wire)
+                       ++i;
+               return sig.extract(0, i);
+       };
+       sigA = unextend(port(dsp, \A));
+       sigB = unextend(port(dsp, \B));
+
+       sigC = port(dsp, \C, SigSpec());
+       sigD = port(dsp, \D, SigSpec());
+
+       SigSpec P = port(dsp, \P);
+       // Only care about those bits that are used
+       int i;
+       for (i = GetSize(P)-1; i >= 0; i--)
+               if (nusers(P[i]) > 1)
+                       break;
+       i++;
+       log_assert(nusers(P.extract_end(i)) <= 1);
+       // This sigM could have no users if downstream sinks (e.g. $add) is
+       //   narrower than $mul result, for example
+       if (i == 0)
+               reject;
+       sigM = P.extract(0, i);
+
+       clock = port(dsp, \CLK, SigBit());
+endcode
+
+// (2) Match the driver of the 'B' input to a possible $dff cell (B1REG)
+//     (attached to at most two $mux cells that implement clock-enable or
+//      reset functionality, using a subpattern discussed above)
+//     If matched, treat 'B' input as input of B1REG
+code argQ ffB1 ffB1cemux ffB1rstmux ffBcepol ffBrstpol sigB clock
+       if (param(dsp, \B1REG).as_int() == 0 && param(dsp, \B0REG).as_int() == 0 && port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero()) {
+               argQ = sigB;
+               subpattern(in_dffe);
+               if (dff) {
+                       ffB1 = dff;
+                       clock = dffclock;
+                       if (dffrstmux) {
+                               ffB1rstmux = dffrstmux;
+                               ffBrstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffB1cemux = dffcemux;
+                               ffBcepol = dffcepol;
+                       }
+                       sigB = dffD;
+               }
+       }
+endcode
+
+// (3) Match the driver of the 'B' and 'D' inputs for a possible $add cell
+//     (pre-adder)
+match preAdd
+       if sigD.empty() || sigD.is_fully_zero()
+       if param(dsp, \B0REG).as_int() == 0
+       // Ensure that preAdder not already used
+       if port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero()
+
+       select preAdd->type.in($add, $sub)
+       // Output has to be 18 bits or less
+       select GetSize(port(preAdd, \Y)) <= 18
+       select nusers(port(preAdd, \Y)) == 2
+       // D port has to be 18 bits or less
+       select GetSize(port(preAdd, \A)) <= 18
+       // B port has to be 18 bits or less
+       select GetSize(port(preAdd, \B)) <= 18
+       index <SigSpec> port(preAdd, \Y) === sigB
+
+       optional
+endmatch
+
+code sigB sigD
+       if (preAdd) {
+               sigD = port(preAdd, \A);
+               sigB = port(preAdd, \B);
+       }
+endcode
+
+// (4) Match 'B' input for B0REG
+code argQ ffB0 ffB0cemux ffB0rstmux ffBcepol ffBrstpol sigB clock
+       if (param(dsp, \B0REG).as_int() == 0) {
+               argQ = sigB;
+               subpattern(in_dffe);
+               if (dff) {
+                       if (ffB1) {
+                               if ((ffB1rstmux != nullptr) ^ (dffrstmux != nullptr))
+                                       goto ffB0_end;
+                               if ((ffB1cemux != nullptr) ^ (dffcemux != nullptr))
+                                       goto ffB0_end;
+                               if (dffrstmux) {
+                                       if (ffBrstpol != dffrstpol)
+                                               goto ffB0_end;
+                                       if (port(ffB1rstmux, \S) != port(dffrstmux, \S))
+                                               goto ffB0_end;
+                                       ffB0rstmux = dffrstmux;
+                               }
+                               if (dffcemux) {
+                                       if (ffBcepol != dffcepol)
+                                               goto ffB0_end;
+                                       if (port(ffB1cemux, \S) != port(dffcemux, \S))
+                                               goto ffB0_end;
+                                       ffB0cemux = dffcemux;
+                               }
+                       }
+                       ffB0 = dff;
+                       clock = dffclock;
+                       if (dffrstmux) {
+                               ffB0rstmux = dffrstmux;
+                               ffBrstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffB0cemux = dffcemux;
+                               ffBcepol = dffcepol;
+                       }
+                       sigB = dffD;
+               }
+       }
+ffB0_end:
+endcode
+
+// (5) Match 'A' input for A1REG
+//     If A1REG, then match 'A' input for A0REG
+code argQ ffA1 ffA1cemux ffA1rstmux ffAcepol ffArstpol sigA clock ffA0 ffA0cemux ffA0rstmux
+       if (param(dsp, \A0REG).as_int() == 0 && param(dsp, \A1REG).as_int() == 0) {
+               argQ = sigA;
+               subpattern(in_dffe);
+               if (dff) {
+                       ffA1 = dff;
+                       clock = dffclock;
+                       if (dffrstmux) {
+                               ffA1rstmux = dffrstmux;
+                               ffArstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffA1cemux = dffcemux;
+                               ffAcepol = dffcepol;
+                       }
+                       sigA = dffD;
+
+                       // Now attempt to match A0
+                       if (ffA1) {
+                               argQ = sigA;
+                               subpattern(in_dffe);
+                               if (dff) {
+                                       if ((ffA1rstmux != nullptr) ^ (dffrstmux != nullptr))
+                                               goto ffA0_end;
+                                       if ((ffA1cemux != nullptr) ^ (dffcemux != nullptr))
+                                               goto ffA0_end;
+                                       if (dffrstmux) {
+                                               if (ffArstpol != dffrstpol)
+                                                       goto ffA0_end;
+                                               if (port(ffA1rstmux, \S) != port(dffrstmux, \S))
+                                                       goto ffA0_end;
+                                               ffA0rstmux = dffrstmux;
+                                       }
+                                       if (dffcemux) {
+                                               if (ffAcepol != dffcepol)
+                                                       goto ffA0_end;
+                                               if (port(ffA1cemux, \S) != port(dffcemux, \S))
+                                                       goto ffA0_end;
+                                               ffA0cemux = dffcemux;
+                                       }
+
+                                       ffA0 = dff;
+                                       clock = dffclock;
+
+                                       if (dffcemux) {
+                                               ffA0cemux = dffcemux;
+                                               ffAcepol = dffcepol;
+                                       }
+                                       sigA = dffD;
+
+ffA0_end:                              ;
+                               }
+                       }
+
+               }
+       }
+endcode
+
+// (6) Match 'D' input for DREG
+code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
+       if (param(dsp, \DREG).as_int() == 0) {
+               argQ = sigD;
+               subpattern(in_dffe);
+               if (dff) {
+                       ffD = dff;
+                       clock = dffclock;
+                       if (dffrstmux) {
+                               ffDrstmux = dffrstmux;
+                               ffDrstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffDcemux = dffcemux;
+                               ffDcepol = dffcepol;
+                       }
+                       sigD = dffD;
+               }
+       }
+endcode
+
+// (7) Match 'P' output that exclusively drives an MREG
+code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
+       if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
+               argD = sigM;
+               subpattern(out_dffe);
+               if (dff) {
+                       ffM = dff;
+                       clock = dffclock;
+                       if (dffrstmux) {
+                               ffMrstmux = dffrstmux;
+                               ffMrstpol = dffrstpol;
+                       }
+                       if (dffcemux) {
+                               ffMcemux = dffcemux;
+                               ffMcepol = dffcepol;
+                       }
+                       sigM = dffQ;
+               }
+       }
+       sigP = sigM;
+endcode
+
+// (8) Match 'P' output that exclusively drives one of two inputs to an $add
+//     cell (post-adder).
+//     The other input to the adder is assumed to come in from the 'C' input
+//     (note: 'P' -> 'C' connections that exist for accumulators are
+//      recognised in xilinx_dsp.cc).
+match postAdd
+       // Ensure that Z mux is not already used
+       if port(dsp, \OPMODE, SigSpec()).extract(2,2).is_fully_zero()
+
+       select postAdd->type.in($add)
+       select GetSize(port(postAdd, \Y)) <= 48
+       choice <IdString> AB {\A, \B}
+       select nusers(port(postAdd, AB)) <= 3
+       filter ffMcemux || nusers(port(postAdd, AB)) == 2
+       filter !ffMcemux || nusers(port(postAdd, AB)) == 3
+
+       index <SigBit> port(postAdd, AB)[0] === sigP[0]
+       filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
+       filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
+       // Check that remainder of AB is a sign- or zero-extension
+       filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
+
+       set postAddAB AB
+       optional
+endmatch
+
+code sigC sigP
+       if (postAdd) {
+               sigC = port(postAdd, postAddAB == \A ? \B : \A);
+               sigP = port(postAdd, \Y);
+       }
+endcode
+
+// (9) Match 'P' output that exclusively drives a PREG
+code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
+       if (param(dsp, \PREG).as_int() == 0) {
+               int users = 2;
+               // If ffMcemux and no postAdd new-value net must have three users: ffMcemux, ffM and ffPcemux
+               if (ffMcemux && !postAdd) users++;
+               if (nusers(sigP) == users) {
+                       argD = sigP;
+                       subpattern(out_dffe);
+                       if (dff) {
+                               ffP = dff;
+                               clock = dffclock;
+                               if (dffrstmux) {
+                                       ffPrstmux = dffrstmux;
+                                       ffPrstpol = dffrstpol;
+                               }
+                               if (dffcemux) {
+                                       ffPcemux = dffcemux;
+                                       ffPcepol = dffcepol;
+                               }
+                               sigP = dffQ;
+                       }
+               }
+       }
+endcode
+
+// (10) If post-adder and PREG both present, match for a $mux cell driving
+//      the 'C' input, where one of the $mux's inputs is the PREG output.
+//      This indicates an accumulator situation, and one where a $mux exists
+//      to override the accumulated value:
+//           +--------------------------------+
+//           |   ____                         |
+//           +--|    \                        |
+//              |$mux|-+                      |
+//       'C' ---|____/ |                      |
+//                     | /-------\   +----+   |
+//          +----+     +-| post- |___|PREG|---+ 'P'
+//          |MREG|------ | adder |   +----+
+//          +----+       \-------/
+match postAddMux
+       if postAdd
+       if ffP
+       select postAddMux->type.in($mux)
+       select nusers(port(postAddMux, \Y)) == 2
+       choice <IdString> AB {\A, \B}
+       index <SigSpec> port(postAddMux, AB) === sigP
+       index <SigSpec> port(postAddMux, \Y) === sigC
+       set postAddMuxAB AB
+       optional
+endmatch
+
+code sigC
+       if (postAddMux)
+               sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
+endcode
+
+code
+       accept;
+endcode
+
+// #######################
+
+// Subpattern for matching against input registers, based on knowledge of the
+//   'Q' input. Typically, identifying registers with clock-enable and reset
+//   capability would be a task would be handled by other Yosys passes such as
+//   dff2dffe, but since DSP inference happens much before this, these patterns
+//   have to be manually identified.
+// At a high level:
+//   (1) Starting from a $dff cell that (partially or fully) drives the given
+//       'Q' argument
+//   (2) Match for a $mux cell implementing synchronous reset semantics ---
+//       one that exclusively drives the 'D' input of the $dff, with one of its
+//       $mux inputs being fully zero
+//   (3) Match for a $mux cell implement clock enable semantics --- one that
+//       exclusively drives the 'D' input of the $dff (or the other input of
+//       the reset $mux) and where one of this $mux's inputs is connected to
+//       the 'Q' output of the $dff
+subpattern in_dffe
+arg argD argQ clock
+
+code
+       dff = nullptr;
+       if (GetSize(argQ) == 0)
+               reject;
+       for (const auto &c : argQ.chunks()) {
+               // Abandon matches when 'Q' is a constant
+               if (!c.wire)
+                       reject;
+               // Abandon matches when 'Q' has the keep attribute set
+               if (c.wire->get_bool_attribute(\keep))
+                       reject;
+               // Abandon matches when 'Q' has a non-zero init attribute set
+               // (not supported by DSP48E1)
+               Const init = c.wire->attributes.at(\init, Const());
+               if (!init.empty())
+                       for (auto b : init.extract(c.offset, c.width))
+                               if (b != State::Sx && b != State::S0)
+                                       reject;
+       }
+endcode
+
+// (1) Starting from a $dff cell that (partially or fully) drives the given
+//     'Q' argument
+match ff
+       select ff->type.in($dff)
+       // DSP48E1 does not support clock inversion
+       select param(ff, \CLK_POLARITY).as_bool()
+
+       slice offset GetSize(port(ff, \D))
+       index <SigBit> port(ff, \Q)[offset] === argQ[0]
+
+       // Check that the rest of argQ is present
+       filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
+       filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+       filter clock == SigBit() || port(ff, \CLK) == clock
+
+       set ffoffset offset
+endmatch
+
+code argQ argD
+       SigSpec Q = port(ff, \Q);
+       dff = ff;
+       dffclock = port(ff, \CLK);
+       dffD = argQ;
+       argD = port(ff, \D);
+       argQ = Q;
+       dffD.replace(argQ, argD);
+       // Only search for ffrstmux if dffD only
+       //   has two (ff, ffrstmux) users
+       if (nusers(dffD) > 2)
+               argD = SigSpec();
+endcode
+
+// (2) Match for a $mux cell implementing synchronous reset semantics ---
+//     exclusively drives the 'D' input of the $dff, with one of the $mux
+//     inputs being fully zero
+match ffrstmux
+       if !argD.empty()
+       select ffrstmux->type.in($mux)
+       index <SigSpec> port(ffrstmux, \Y) === argD
+
+       choice <IdString> BA {\B, \A}
+       // DSP48E1 only supports reset to zero
+       select port(ffrstmux, BA).is_fully_zero()
+
+       define <bool> pol (BA == \B)
+       set ffrstpol pol
+       semioptional
+endmatch
+
+code argD
+       if (ffrstmux) {
+               dffrstmux = ffrstmux;
+               dffrstpol = ffrstpol;
+               argD = port(ffrstmux, ffrstpol ? \A : \B);
+               dffD.replace(port(ffrstmux, \Y), argD);
+
+               // Only search for ffcemux if argQ has at
+               //   least 3 users (ff, <upstream>, ffrstmux) and
+               //   dffD only has two (ff, ffrstmux)
+               if (!(nusers(argQ) >= 3 && nusers(dffD) == 2))
+                       argD = SigSpec();
+       }
+       else
+               dffrstmux = nullptr;
+endcode
+
+// (3) Match for a $mux cell implement clock enable semantics --- one that
+//     exclusively drives the 'D' input of the $dff (or the other input of
+//     the reset $mux) and where one of this $mux's inputs is connected to
+//     the 'Q' output of the $dff
+match ffcemux
+       if !argD.empty()
+       select ffcemux->type.in($mux)
+       index <SigSpec> port(ffcemux, \Y) === argD
+       choice <IdString> AB {\A, \B}
+       index <SigSpec> port(ffcemux, AB) === argQ
+       define <bool> pol (AB == \A)
+       set ffcepol pol
+       semioptional
+endmatch
+
+code argD
+       if (ffcemux) {
+               dffcemux = ffcemux;
+               dffcepol = ffcepol;
+               argD = port(ffcemux, ffcepol ? \B : \A);
+               dffD.replace(port(ffcemux, \Y), argD);
+       }
+       else
+               dffcemux = nullptr;
+endcode
+
+// #######################
+
+// Subpattern for matching against output registers, based on knowledge of the
+//   'D' input.
+// At a high level:
+//   (1) Starting from an optional $mux cell that implements clock enable
+//       semantics --- one where the given 'D' argument (partially or fully)
+//       drives one of its two inputs
+//   (2) Starting from, or continuing onto, another optional $mux cell that
+//       implements synchronous reset semantics --- one where the given 'D'
+//       argument (or the clock enable $mux output) drives one of its two inputs
+//       and where the other input is fully zero
+//   (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
+//       output of the previous clock enable or reset $mux cells)
+subpattern out_dffe
+arg argD argQ clock
+
+code
+       dff = nullptr;
+       for (auto c : argD.chunks())
+               // Abandon matches when 'D' has the keep attribute set
+               if (c.wire->get_bool_attribute(\keep))
+                       reject;
+endcode
+
+// (1) Starting from an optional $mux cell that implements clock enable
+//     semantics --- one where the given 'D' argument (partially or fully)
+//     drives one of its two inputs
+match ffcemux
+       select ffcemux->type.in($mux)
+       // ffcemux output must have two users: ffcemux and ff.D
+       select nusers(port(ffcemux, \Y)) == 2
+
+       choice <IdString> AB {\A, \B}
+       // keep-last-value net must have at least three users: ffcemux, ff, downstream sink(s)
+       select nusers(port(ffcemux, AB)) >= 3
+
+       slice offset GetSize(port(ffcemux, \Y))
+       define <IdString> BA (AB == \A ? \B : \A)
+       index <SigBit> port(ffcemux, BA)[offset] === argD[0]
+
+       // Check that the rest of argD is present
+       filter GetSize(port(ffcemux, BA)) >= offset + GetSize(argD)
+       filter port(ffcemux, BA).extract(offset, GetSize(argD)) == argD
+
+       set ffoffset offset
+       define <bool> pol (AB == \A)
+       set ffcepol pol
+
+       semioptional
+endmatch
+
+code argD argQ
+       dffcemux = ffcemux;
+       if (ffcemux) {
+               SigSpec BA = port(ffcemux, ffcepol ? \B : \A);
+               SigSpec Y = port(ffcemux, \Y);
+               argQ = argD;
+               argD.replace(BA, Y);
+               argQ.replace(BA, port(ffcemux, ffcepol ? \A : \B));
+
+               dffcemux = ffcemux;
+               dffcepol = ffcepol;
+       }
+endcode
+
+// (2) Starting from, or continuing onto, another optional $mux cell that
+//     implements synchronous reset semantics --- one where the given 'D'
+//     argument (or the clock enable $mux output) drives one of its two inputs
+//     and where the other input is fully zero
+match ffrstmux
+       select ffrstmux->type.in($mux)
+       // ffrstmux output must have two users: ffrstmux and ff.D
+       select nusers(port(ffrstmux, \Y)) == 2
+
+       choice <IdString> BA {\B, \A}
+       // DSP48E1 only supports reset to zero
+       select port(ffrstmux, BA).is_fully_zero()
+
+       slice offset GetSize(port(ffrstmux, \Y))
+       define <IdString> AB (BA == \B ? \A : \B)
+       index <SigBit> port(ffrstmux, AB)[offset] === argD[0]
+
+       // Check that offset is consistent
+       filter !ffcemux || ffoffset == offset
+       // Check that the rest of argD is present
+       filter GetSize(port(ffrstmux, AB)) >= offset + GetSize(argD)
+       filter port(ffrstmux, AB).extract(offset, GetSize(argD)) == argD
+
+       set ffoffset offset
+       define <bool> pol (AB == \A)
+       set ffrstpol pol
+
+       semioptional
+endmatch
+
+code argD argQ
+       dffrstmux = ffrstmux;
+       if (ffrstmux) {
+               SigSpec AB = port(ffrstmux, ffrstpol ? \A : \B);
+               SigSpec Y = port(ffrstmux, \Y);
+               argD.replace(AB, Y);
+
+               dffrstmux = ffrstmux;
+               dffrstpol = ffrstpol;
+       }
+endcode
+
+// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
+//     output of the previous clock enable or reset $mux cells)
+match ff
+       select ff->type.in($dff)
+       // DSP48E1 does not support clock inversion
+       select param(ff, \CLK_POLARITY).as_bool()
+
+       slice offset GetSize(port(ff, \D))
+       index <SigBit> port(ff, \D)[offset] === argD[0]
+
+       // Check that offset is consistent
+       filter (!ffcemux && !ffrstmux) || ffoffset == offset
+       // Check that the rest of argD is present
+       filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
+       filter port(ff, \D).extract(offset, GetSize(argD)) == argD
+       // Check that FF.Q is connected to CE-mux
+       filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
+
+       filter clock == SigBit() || port(ff, \CLK) == clock
+
+       set ffoffset offset
+endmatch
+
+code argQ
+       SigSpec D = port(ff, \D);
+       SigSpec Q = port(ff, \Q);
+       if (!ffcemux) {
+               argQ = argD;
+               argQ.replace(D, Q);
+       }
+
+       // Abandon matches when 'Q' has a non-zero init attribute set
+       // (not supported by DSP48E1)
+       for (auto c : argQ.chunks()) {
+               Const init = c.wire->attributes.at(\init, Const());
+               if (!init.empty())
+                       for (auto b : init.extract(c.offset, c.width))
+                               if (b != State::Sx && b != State::S0)
+                                       reject;
+       }
+
+       dff = ff;
+       dffQ = argQ;
+       dffclock = port(ff, \CLK);
+endcode
index 5cd34162e2b34b1bf40710f135a93eeb1b7cfeea..b20e4f458f004e9d9c430ab3622a6719a1becbfe 100644 (file)
@@ -1,7 +1,7 @@
 // This file describes the second of three pattern matcher setups that
 //   forms the `xilinx_dsp` pass described in xilinx_dsp.cc
 // At a high level, it works as follows:
-//   (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
+//   (1) Starting from a DSP48* cell that (a) doesn't have a CREG already,
 //       and (b) uses the 'C' port
 //   (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
 //       (attached to at most two $mux cells that implement clock-enable or
@@ -38,10 +38,10 @@ udata <SigBit> dffclock
 udata <Cell*> dff dffcemux dffrstmux
 udata <bool> dffcepol dffrstpol
 
-// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
+// (1) Starting from a DSP48* cell that (a) doesn't have a CREG already,
 //     and (b) uses the 'C' port
 match dsp
-       select dsp->type.in(\DSP48E1)
+       select dsp->type.in(\DSP48A, \DSP48A1, \DSP48E1)
        select param(dsp, \CREG, 1).as_int() == 0
        select nusers(port(dsp, \C, SigSpec())) > 1
 endmatch
@@ -60,7 +60,8 @@ code sigC sigP clock
        sigC = unextend(port(dsp, \C, SigSpec()));
 
        SigSpec P = port(dsp, \P);
-       if (param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") {
+       if (!dsp->type.in(\DSP48E1) ||
+ param(dsp, \USE_MULT, Const("MULTIPLY")).decode_string() == "MULTIPLY") {
                // Only care about those bits that are used
                int i;
                for (i = GetSize(P)-1; i >= 0; i--)
index 971089b289a385230718a3ff02de4d14560a78a0..a1904691198f6227b120d6ad7646be1e33c08c0f 100644 (file)
@@ -387,7 +387,10 @@ struct SynthXilinxPass : public ScriptPass
                                run("opt_expr -fine");
                                run("wreduce");
                                run("select -clear");
-                               run("xilinx_dsp");
+                               if (help_mode)
+                                       run("xilinx_dsp -family <family>");
+                               else
+                                       run("xilinx_dsp -family " + family);
                                run("chtype -set $mul t:$__soft_mul");
                        }
                }
index 87348a1737c7551c9a7ec36b67097f6d11a6c153..258f903955aeb9dcc5fc626f214078c9ba409a96 100644 (file)
@@ -27,7 +27,7 @@ module \$__MUL18X18 (input [17:0] A, input [17:0] B, output [35:0] Y);
                .D(18'b0),
                .P(P_48),
 
-               .OPMODE(8'b0000010)
+               .OPMODE(8'b0000001)
        );
        assign Y = P_48;
 endmodule
index e8705723b0f45a69a35d21b3c8bb4cb2b2d5d018..bdce60c149ba9a0a7f755de053660722923dbaed 100644 (file)
@@ -27,7 +27,7 @@ module \$__MUL18X18 (input [17:0] A, input [17:0] B, output [35:0] Y);
                .D(18'b0),
                .P(P_48),
 
-               .OPMODE(8'b0000010)
+               .OPMODE(8'b0000001)
        );
        assign Y = P_48;
 endmodule
index 154a29848e7a5f880c1bfa4284b7121fa66299c2..58b97b6465d616c9d7a29e325f5dad32c4db0fce 100644 (file)
@@ -1,3 +1,6 @@
 ../../../yosys -qp "synth_xilinx -top macc2; rename -top macc2_uut" -o macc_uut.v macc.v
 iverilog -o test_macc macc_tb.v macc_uut.v macc.v ../../../techlibs/xilinx/cells_sim.v
 vvp -N ./test_macc
+../../../yosys -qp "synth_xilinx -family xc6s -top macc2; rename -top macc2_uut" -o macc_uut.v macc.v
+iverilog -o test_macc macc_tb.v macc_uut.v macc.v ../../../techlibs/xilinx/cells_sim.v
+vvp -N ./test_macc
index d7681496660f36887d92442a45407ab9a100d4e7..6cf994fbf77d326b6843f1e8d2cad30b050568f6 100644 (file)
@@ -7,3 +7,15 @@ cd top # Constrain all select calls below inside the top module
 
 select -assert-count 1 t:DSP48E1
 select -assert-none t:DSP48E1 %% t:* %D
+
+design -reset
+
+read_verilog ../common/mul.v
+hierarchy -top top
+proc
+equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc6s # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd top # Constrain all select calls below inside the top module
+
+select -assert-count 1 t:DSP48A1
+select -assert-none t:DSP48A1 %% t:* %D
index 62495b90cf423839a4adf375bf4252e6fcd29fb9..c714680afc35df5494a55e16907ddf961b7d1d51 100644 (file)
@@ -9,3 +9,17 @@ select -assert-count 1 t:BUFG
 select -assert-count 1 t:DSP48E1
 select -assert-count 30 t:FDRE
 select -assert-none t:DSP48E1 t:FDRE t:BUFG %% t:* %D
+
+design -reset
+
+read_verilog mul_unsigned.v
+hierarchy -top mul_unsigned
+proc
+
+equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -family xc6s # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd mul_unsigned # Constrain all select calls below inside the top module
+select -assert-count 1 t:BUFG
+select -assert-count 1 t:DSP48A1
+select -assert-count 30 t:FDRE
+select -assert-none t:DSP48A1 t:FDRE t:BUFG %% t:* %D