From 83cde2d02ba06bbd4014858983ac324bf44cb6c6 Mon Sep 17 00:00:00 2001 From: Dan Ravensloft Date: Sat, 23 May 2020 12:52:13 +0100 Subject: [PATCH] intel_alm: ABC9 sequential optimisations --- techlibs/intel_alm/Makefile.inc | 3 ++ techlibs/intel_alm/common/abc9_map.v | 18 +++++++++ techlibs/intel_alm/common/abc9_model.v | 55 ++++++++++++++++++++++++++ techlibs/intel_alm/common/abc9_unmap.v | 11 ++++++ techlibs/intel_alm/common/dff_sim.v | 42 +++++++++++++++----- techlibs/intel_alm/common/mem_sim.v | 10 +++++ techlibs/intel_alm/synth_intel_alm.cc | 29 +++++++++----- 7 files changed, 149 insertions(+), 19 deletions(-) create mode 100644 techlibs/intel_alm/common/abc9_map.v create mode 100644 techlibs/intel_alm/common/abc9_model.v create mode 100644 techlibs/intel_alm/common/abc9_unmap.v diff --git a/techlibs/intel_alm/Makefile.inc b/techlibs/intel_alm/Makefile.inc index ed6c4510b..477be6353 100644 --- a/techlibs/intel_alm/Makefile.inc +++ b/techlibs/intel_alm/Makefile.inc @@ -2,6 +2,9 @@ OBJS += techlibs/intel_alm/synth_intel_alm.o # Techmap +$(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/abc9_map.v)) +$(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/abc9_unmap.v)) +$(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/abc9_model.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/alm_map.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/alm_sim.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/arith_alm_map.v)) diff --git a/techlibs/intel_alm/common/abc9_map.v b/techlibs/intel_alm/common/abc9_map.v new file mode 100644 index 000000000..32ad79bdc --- /dev/null +++ b/techlibs/intel_alm/common/abc9_map.v @@ -0,0 +1,18 @@ +// This file exists to map purely-synchronous flops to ABC9 flops, while +// mapping flops with asynchronous-clear as boxes, this is because ABC9 +// doesn't support asynchronous-clear flops in sequential synthesis. + +module MISTRAL_FF( + input DATAIN, CLK, ACLR, ENA, SCLR, SLOAD, SDATA, + output reg Q +); + +parameter _TECHMAP_CONSTMSK_ACLR_ = 1'b0; + +// If the async-clear is constant, we assume it's disabled. +if (_TECHMAP_CONSTMSK_ACLR_ != 1'b0) + MISTRAL_FF_SYNCONLY _TECHMAP_REPLACE_ (.DATAIN(DATAIN), .CLK(CLK), .ENA(ENA), .SCLR(SCLR), .SLOAD(SLOAD), .SDATA(SDATA), .Q(Q)); +else + wire _TECHMAP_FAIL_ = 1; + +endmodule diff --git a/techlibs/intel_alm/common/abc9_model.v b/techlibs/intel_alm/common/abc9_model.v new file mode 100644 index 000000000..dd46147a5 --- /dev/null +++ b/techlibs/intel_alm/common/abc9_model.v @@ -0,0 +1,55 @@ +`ifdef cyclonev +`define SYNCPATH 262 +`define SYNCSETUP 522 +`define COMBPATH 0 +`endif +`ifdef cyclone10gx +`define SYNCPATH 219 +`define SYNCSETUP 268 +`define COMBPATH 0 +`endif + +// fallback for when a family isn't detected (e.g. when techmapping for equivalence) +`ifndef SYNCPATH +`define SYNCPATH 0 +`define SYNCSETUP 0 +`define COMBPATH 0 +`endif + +// This is a purely-synchronous flop, that ABC9 can use for sequential synthesis. +(* abc9_flop, lib_whitebox *) +module MISTRAL_FF_SYNCONLY( + input DATAIN, CLK, ENA, SCLR, SLOAD, SDATA, + output reg Q +); + +specify + if (ENA) (posedge CLK => (Q : DATAIN)) = `SYNCPATH; + if (ENA) (posedge CLK => (Q : SCLR)) = `SYNCPATH; + if (ENA) (posedge CLK => (Q : SLOAD)) = `SYNCPATH; + if (ENA) (posedge CLK => (Q : SDATA)) = `SYNCPATH; + + $setup(DATAIN, posedge CLK, `SYNCSETUP); + $setup(ENA, posedge CLK, `SYNCSETUP); + $setup(SCLR, posedge CLK, `SYNCSETUP); + $setup(SLOAD, posedge CLK, `SYNCSETUP); + $setup(SDATA, posedge CLK, `SYNCSETUP); +endspecify + +initial begin + // Altera flops initialise to zero. + Q = 0; +end + +always @(posedge CLK) begin + // Clock-enable + if (ENA) begin + // Synchronous clear + if (SCLR) Q <= 0; + // Synchronous load + else if (SLOAD) Q <= SDATA; + else Q <= DATAIN; + end +end + +endmodule diff --git a/techlibs/intel_alm/common/abc9_unmap.v b/techlibs/intel_alm/common/abc9_unmap.v new file mode 100644 index 000000000..0eda69560 --- /dev/null +++ b/techlibs/intel_alm/common/abc9_unmap.v @@ -0,0 +1,11 @@ +// After performing sequential synthesis, map the synchronous flops back to +// standard MISTRAL_FF flops. + +module MISTRAL_FF_SYNCONLY( + input DATAIN, CLK, ENA, SCLR, SLOAD, SDATA, + output reg Q +); + +MISTRAL_FF _TECHMAP_REPLACE_ (.DATAIN(DATAIN), .CLK(CLK), .ACLR(1'b1), .ENA(ENA), .SCLR(SCLR), .SLOAD(SLOAD), .SDATA(SDATA), .Q(Q)); + +endmodule diff --git a/techlibs/intel_alm/common/dff_sim.v b/techlibs/intel_alm/common/dff_sim.v index 32444dd46..94aa37fb5 100644 --- a/techlibs/intel_alm/common/dff_sim.v +++ b/techlibs/intel_alm/common/dff_sim.v @@ -53,23 +53,45 @@ // Q: data output // // Note: the DFFEAS primitive is mostly emulated; it does not reflect what the hardware implements. + +`ifdef cyclonev +`define SYNCPATH 262 +`define SYNCSETUP 522 +`define COMBPATH 0 +`endif +`ifdef cyclone10gx +`define SYNCPATH 219 +`define SYNCSETUP 268 +`define COMBPATH 0 +`endif + +// fallback for when a family isn't detected (e.g. when techmapping for equivalence) +`ifndef SYNCPATH +`define SYNCPATH 0 +`define SYNCSETUP 0 +`define COMBPATH 0 +`endif + +(* abc9_box, lib_whitebox *) module MISTRAL_FF( input DATAIN, CLK, ACLR, ENA, SCLR, SLOAD, SDATA, output reg Q ); -`ifdef cyclonev -specify - (posedge CLK => (Q : DATAIN)) = 262; - $setup(DATAIN, posedge CLK, 522); -endspecify -`endif -`ifdef cyclone10gx specify - (posedge CLK => (Q : DATAIN)) = 219; - $setup(DATAIN, posedge CLK, 268); + if (ENA) (posedge CLK => (Q : DATAIN)) = `SYNCPATH; + if (ENA) (posedge CLK => (Q : SCLR)) = `SYNCPATH; + if (ENA) (posedge CLK => (Q : SLOAD)) = `SYNCPATH; + if (ENA) (posedge CLK => (Q : SDATA)) = `SYNCPATH; + + $setup(DATAIN, posedge CLK, `SYNCSETUP); + $setup(ENA, posedge CLK, `SYNCSETUP); + $setup(SCLR, posedge CLK, `SYNCSETUP); + $setup(SLOAD, posedge CLK, `SYNCSETUP); + $setup(SDATA, posedge CLK, `SYNCSETUP); + + (ACLR => Q) = `COMBPATH; endspecify -`endif initial begin // Altera flops initialise to zero. diff --git a/techlibs/intel_alm/common/mem_sim.v b/techlibs/intel_alm/common/mem_sim.v index ae79b19a4..f6f9ecb02 100644 --- a/techlibs/intel_alm/common/mem_sim.v +++ b/techlibs/intel_alm/common/mem_sim.v @@ -48,10 +48,20 @@ // the following model because it's very difficult to trigger this in practice // as clock cycles will be much longer than any potential blip of 'x, so the // model can be treated as always returning a defined result. + +(* abc9_box, lib_whitebox *) module MISTRAL_MLAB(input [4:0] A1ADDR, input A1DATA, A1EN, CLK1, input [4:0] B1ADDR, output B1DATA); reg [31:0] mem = 32'b0; +// TODO +specify + $setup(A1ADDR, posedge CLK1, 0); + $setup(A1DATA, posedge CLK1, 0); + + (B1ADDR *> B1DATA) = 0; +endspecify + always @(posedge CLK1) if (A1EN) mem[A1ADDR] <= A1DATA; diff --git a/techlibs/intel_alm/synth_intel_alm.cc b/techlibs/intel_alm/synth_intel_alm.cc index fabfc9003..982857dd5 100644 --- a/techlibs/intel_alm/synth_intel_alm.cc +++ b/techlibs/intel_alm/synth_intel_alm.cc @@ -38,20 +38,26 @@ struct SynthIntelALMPass : public ScriptPass { log("This command runs synthesis for ALM-based Intel FPGAs.\n"); log("\n"); log(" -top \n"); - log(" use the specified module as top module (default='top')\n"); + log(" use the specified module as top module\n"); log("\n"); log(" -family \n"); log(" target one of:\n"); log(" \"cyclonev\" - Cyclone V (default)\n"); log(" \"cyclone10gx\" - Cyclone 10GX\n"); log("\n"); - log(" -quartus\n"); - log(" output a netlist using Quartus cells instead of MISTRAL_* cells\n"); - log("\n"); log(" -vqm \n"); log(" write the design to the specified Verilog Quartus Mapping File. Writing of an\n"); log(" output file is omitted if this parameter is not specified. Implies -quartus.\n"); log("\n"); + log(" -noflatten\n"); + log(" do not flatten design before synthesis; useful for per-module area statistics\n"); + log("\n"); + log(" -quartus\n"); + log(" output a netlist using Quartus cells instead of MISTRAL_* cells\n"); + log("\n"); + log(" -dff\n"); + log(" pass DFFs to ABC to perform sequential logic optimisations (EXPERIMENTAL)\n"); + log("\n"); log(" -run :\n"); log(" only run the commands between the labels (see below). an empty\n"); log(" from label is synonymous to 'begin', and empty to label is\n"); @@ -63,16 +69,13 @@ struct SynthIntelALMPass : public ScriptPass { log(" -nobram\n"); log(" do not use block RAM cells in output netlist\n"); log("\n"); - log(" -noflatten\n"); - log(" do not flatten design before synthesis\n"); - log("\n"); log("The following commands are executed by this synthesis command:\n"); help_script(); log("\n"); } string top_opt, family_opt, bram_type, vout_file; - bool flatten, quartus, nolutram, nobram; + bool flatten, quartus, nolutram, nobram, dff; void clear_flags() override { @@ -84,6 +87,7 @@ struct SynthIntelALMPass : public ScriptPass { quartus = false; nolutram = false; nobram = false; + dff = false; } void execute(std::vector args, RTLIL::Design *design) override @@ -130,6 +134,10 @@ struct SynthIntelALMPass : public ScriptPass { flatten = false; continue; } + if (args[argidx] == "-dff") { + dff = true; + continue; + } break; } extra_args(args, argidx, design); @@ -165,6 +173,7 @@ struct SynthIntelALMPass : public ScriptPass { run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/alm_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/dff_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/mem_sim.v", family_opt.c_str())); + run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/abc9_model.v", family_opt.c_str())); // Misc and common cells run("read_verilog -lib +/intel/common/altpll_bb.v"); @@ -209,7 +218,9 @@ struct SynthIntelALMPass : public ScriptPass { } if (check_label("map_luts")) { - run("abc9 -maxlut 6 -W 200"); + run("techmap -map +/intel_alm/common/abc9_map.v"); + run(stringf("abc9 %s -maxlut 6 -W 200", help_mode ? "[-dff]" : dff ? "-dff" : "")); + run("techmap -map +/intel_alm/common/abc9_unmap.v"); run("techmap -map +/intel_alm/common/alm_map.v"); run("opt -fast"); run("autoname"); -- 2.30.2