From 95665730540c0fd7c76690f28d0fd6b5f13f2223 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 14 Jun 2019 12:02:12 +0100 Subject: [PATCH] ecp5: Add abc9 option Signed-off-by: David Shah --- backends/aiger/xaiger.cc | 3 +- passes/techmap/abc9.cc | 9 +++ techlibs/ecp5/Makefile.inc | 3 + techlibs/ecp5/abc_5g.box | 36 ++++++++++ techlibs/ecp5/abc_5g.lut | 25 +++++++ techlibs/ecp5/cells_map.v | 133 ++++++++++++++++++++++-------------- techlibs/ecp5/cells_sim.v | 32 ++++++--- techlibs/ecp5/synth_ecp5.cc | 25 +++++-- 8 files changed, 194 insertions(+), 72 deletions(-) create mode 100644 techlibs/ecp5/abc_5g.box create mode 100644 techlibs/ecp5/abc_5g.lut diff --git a/backends/aiger/xaiger.cc b/backends/aiger/xaiger.cc index fcf9a7bf1..5fa20c9c2 100644 --- a/backends/aiger/xaiger.cc +++ b/backends/aiger/xaiger.cc @@ -833,8 +833,7 @@ struct XAigerWriter Pass::call(holes_module->design, "flatten -wb"); // TODO: Should techmap all lib_whitebox-es once - //Pass::call(holes_module->design, "techmap"); - + Pass::call(holes_module->design, "techmap"); Pass::call(holes_module->design, "aigmap"); Pass::call(holes_module->design, "clean -purge"); diff --git a/passes/techmap/abc9.cc b/passes/techmap/abc9.cc index a6ec4a6fb..d4f5c5238 100644 --- a/passes/techmap/abc9.cc +++ b/passes/techmap/abc9.cc @@ -22,7 +22,16 @@ // Berkeley Logic Synthesis and Verification Group, ABC: A System for Sequential Synthesis and Verification // http://www.eecs.berkeley.edu/~alanmi/abc/ +#if 0 +// Based on &flow3 - better QoR but more experimental +#define ABC_COMMAND_LUT "&st; &ps -l; "/*"&sweep -v;"*/" &scorr; " \ + "&st; &if {W}; &save; &st; &syn2; &if {W}; &save; &load; "\ + "&st; &if -g -K 6; &dch -f; &if {W}; &save; &load; "\ + "&st; &if -g -K 6; &synch2; &if {W}; &save; &load" +#else #define ABC_COMMAND_LUT "&st; &sweep; &scorr; "/*"&dc2; "*/"&retime; &dch -f; &ps -l; &if {W} -v; "/*"&mfs; "*/"&ps -l" +#endif + #define ABC_FAST_COMMAND_LUT "&st; &retime; &if {W}" diff --git a/techlibs/ecp5/Makefile.inc b/techlibs/ecp5/Makefile.inc index 4db087e87..eee3b418f 100644 --- a/techlibs/ecp5/Makefile.inc +++ b/techlibs/ecp5/Makefile.inc @@ -11,6 +11,9 @@ $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/bram.txt)) $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/arith_map.v)) $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/latches_map.v)) +$(eval $(call add_share_file,share/ecp5,techlibs/ecp5/abc_5g.box)) +$(eval $(call add_share_file,share/ecp5,techlibs/ecp5/abc_5g.lut)) + EXTRA_OBJS += techlibs/ecp5/brams_init.mk techlibs/ecp5/brams_connect.mk .SECONDARY: techlibs/ecp5/brams_init.mk techlibs/ecp5/brams_connect.mk diff --git a/techlibs/ecp5/abc_5g.box b/techlibs/ecp5/abc_5g.box new file mode 100644 index 000000000..72af6d9cb --- /dev/null +++ b/techlibs/ecp5/abc_5g.box @@ -0,0 +1,36 @@ +# Box 1 : CCU2C (2xCARRY + 2xLUT4) +# Outputs: S0, S1, COUT +# name ID w/b ins outs +CCU2C 1 1 9 3 + +#A0 A1 B0 B1 C0 C1 D0 D1 CIN +379 - 379 - 275 - 141 - 257 +630 379 630 379 526 275 392 141 273 +516 516 516 516 412 412 278 278 43 + +# Box 2 : TRELLIS_DPR16X4 (16x4 dist ram) +# Outputs: DO0, DO1, DO2, DO3, DO4 +# name ID w/b ins outs +TRELLIS_DPR16X4 2 0 14 4 + +#DI0 DI1 DI2 DI3 RAD0 RAD1 RAD2 RAD3 WAD0 WAD1 WAD2 WAD3 WCK WRE +- - - - 141 379 275 379 - - - - - - +- - - - 141 379 275 379 - - - - - - +- - - - 141 379 275 379 - - - - - - +- - - - 141 379 275 379 - - - - - - + +# Box 3 : PFUMX (MUX2) +# Outputs: Z +# name ID w/b ins outs +PFUMX 3 1 3 1 + +#ALUT BLUT C0 +98 98 151 + +# Box 4 : L6MUX21 (MUX2) +# Outputs: Z +# name ID w/b ins outs +L6MUX21 4 1 3 1 + +#D0 D1 SD +140 141 148 diff --git a/techlibs/ecp5/abc_5g.lut b/techlibs/ecp5/abc_5g.lut new file mode 100644 index 000000000..e8aa9b35d --- /dev/null +++ b/techlibs/ecp5/abc_5g.lut @@ -0,0 +1,25 @@ +# ECP5-5G LUT library for ABC +# Note that ECP5 architecture assigns difference +# in LUT input delay to interconnect, so this is +# considered too + + +# Simple LUTs +# area D C B A +1 1 141 +2 1 141 275 +3 1 141 275 379 +4 1 141 275 379 379 + +# LUT5 = 2x LUT4 + PFUMX +# area M0 D C B A +5 2 151 239 373 477 477 + +# LUT6 = 2x LUT5 + MUX2 +# area M1 M0 D C B A +6 4 148 292 380 514 618 618 + +# LUT7 = 2x LUT6 + MUX2 +# area M2 M1 M0 D C B A +7 8 148 289 433 521 655 759 759 + diff --git a/techlibs/ecp5/cells_map.v b/techlibs/ecp5/cells_map.v index f6c71a03d..53a89e8a3 100644 --- a/techlibs/ecp5/cells_map.v +++ b/techlibs/ecp5/cells_map.v @@ -70,80 +70,107 @@ module \$lut (A, Y); parameter WIDTH = 0; parameter LUT = 0; + + // Need to swap input ordering, and fix init accordingly, + // to match ABC's expectation of LUT inputs in non-decreasing + // delay order + localparam P_WIDTH = WIDTH < 4 ? 4 : WIDTH; + function [P_WIDTH-1:0] permute_index; + input [P_WIDTH-1:0] i; + integer j; + begin + permute_index = 0; + for (j = 0; j < P_WIDTH; j = j + 1) + permute_index[P_WIDTH-1 - j] = i[j]; + end + endfunction + + function [2**P_WIDTH-1:0] permute_init; + input [2**P_WIDTH-1:0] orig; + integer i; + begin + permute_init = 0; + for (i = 0; i < 2**P_WIDTH; i = i + 1) + permute_init[i] = orig[permute_index(i)]; + end + endfunction + + parameter [2**P_WIDTH-1:0] P_LUT = permute_init(LUT); + input [WIDTH-1:0] A; output Y; generate if (WIDTH == 1) begin - LUT4 #(.INIT(LUT)) _TECHMAP_REPLACE_ (.Z(Y), - .A(A[0]), .B(1'b0), .C(1'b0), .D(1'b0)); + LUT4 #(.INIT(P_LUT)) _TECHMAP_REPLACE_ (.Z(Y), + .A(1'b0), .B(1'b0), .C(1'b0), .D(A[0])); end else if (WIDTH == 2) begin - LUT4 #(.INIT(LUT)) _TECHMAP_REPLACE_ (.Z(Y), - .A(A[0]), .B(A[1]), .C(1'b0), .D(1'b0)); + LUT4 #(.INIT(P_LUT)) _TECHMAP_REPLACE_ (.Z(Y), + .A(1'b0), .B(1'b0), .C(A[1]), .D(A[0])); end else if (WIDTH == 3) begin - LUT4 #(.INIT(LUT)) _TECHMAP_REPLACE_ (.Z(Y), - .A(A[0]), .B(A[1]), .C(A[2]), .D(1'b0)); + LUT4 #(.INIT(P_LUT)) _TECHMAP_REPLACE_ (.Z(Y), + .A(1'b0), .B(A[2]), .C(A[1]), .D(A[0])); end else if (WIDTH == 4) begin - LUT4 #(.INIT(LUT)) _TECHMAP_REPLACE_ (.Z(Y), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); + LUT4 #(.INIT(P_LUT)) _TECHMAP_REPLACE_ (.Z(Y), + .A(A[3]), .B(A[2]), .C(A[1]), .D(A[0])); `ifndef NO_PFUMUX end else if (WIDTH == 5) begin wire f0, f1; - LUT4 #(.INIT(LUT[15: 0])) lut0 (.Z(f0), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - LUT4 #(.INIT(LUT[31:16])) lut1 (.Z(f1), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - PFUMX mux5(.ALUT(f1), .BLUT(f0), .C0(A[4]), .Z(Y)); + LUT4 #(.INIT(P_LUT[15: 0])) lut0 (.Z(f0), + .A(A[4]), .B(A[3]), .C(A[2]), .D(A[1])); + LUT4 #(.INIT(P_LUT[31:16])) lut1 (.Z(f1), + .A(A[4]), .B(A[3]), .C(A[2]), .D(A[1])); + PFUMX mux5(.ALUT(f1), .BLUT(f0), .C0(A[0]), .Z(Y)); end else if (WIDTH == 6) begin wire f0, f1, f2, f3, g0, g1; - LUT4 #(.INIT(LUT[15: 0])) lut0 (.Z(f0), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - LUT4 #(.INIT(LUT[31:16])) lut1 (.Z(f1), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - - LUT4 #(.INIT(LUT[47:32])) lut2 (.Z(f2), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - LUT4 #(.INIT(LUT[63:48])) lut3 (.Z(f3), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - - PFUMX mux50(.ALUT(f1), .BLUT(f0), .C0(A[4]), .Z(g0)); - PFUMX mux51(.ALUT(f3), .BLUT(f2), .C0(A[4]), .Z(g1)); - L6MUX21 mux6 (.D0(g0), .D1(g1), .SD(A[5]), .Z(Y)); + LUT4 #(.INIT(P_LUT[15: 0])) lut0 (.Z(f0), + .A(A[5]), .B(A[4]), .C(A[3]), .D(A[2])); + LUT4 #(.INIT(P_LUT[31:16])) lut1 (.Z(f1), + .A(A[5]), .B(A[4]), .C(A[3]), .D(A[2])); + + LUT4 #(.INIT(P_LUT[47:32])) lut2 (.Z(f2), + .A(A[5]), .B(A[4]), .C(A[3]), .D(A[2])); + LUT4 #(.INIT(P_LUT[63:48])) lut3 (.Z(f3), + .A(A[5]), .B(A[4]), .C(A[3]), .D(A[2])); + + PFUMX mux50(.ALUT(f1), .BLUT(f0), .C0(A[1]), .Z(g0)); + PFUMX mux51(.ALUT(f3), .BLUT(f2), .C0(A[1]), .Z(g1)); + L6MUX21 mux6 (.D0(g0), .D1(g1), .SD(A[0]), .Z(Y)); end else if (WIDTH == 7) begin wire f0, f1, f2, f3, f4, f5, f6, f7, g0, g1, g2, g3, h0, h1; - LUT4 #(.INIT(LUT[15: 0])) lut0 (.Z(f0), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - LUT4 #(.INIT(LUT[31:16])) lut1 (.Z(f1), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - - LUT4 #(.INIT(LUT[47:32])) lut2 (.Z(f2), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - LUT4 #(.INIT(LUT[63:48])) lut3 (.Z(f3), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - - LUT4 #(.INIT(LUT[79:64])) lut4 (.Z(f4), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - LUT4 #(.INIT(LUT[95:80])) lut5 (.Z(f5), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - - LUT4 #(.INIT(LUT[111: 96])) lut6 (.Z(f6), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - LUT4 #(.INIT(LUT[127:112])) lut7 (.Z(f7), - .A(A[0]), .B(A[1]), .C(A[2]), .D(A[3])); - - PFUMX mux50(.ALUT(f1), .BLUT(f0), .C0(A[4]), .Z(g0)); - PFUMX mux51(.ALUT(f3), .BLUT(f2), .C0(A[4]), .Z(g1)); - PFUMX mux52(.ALUT(f5), .BLUT(f4), .C0(A[4]), .Z(g2)); - PFUMX mux53(.ALUT(f7), .BLUT(f6), .C0(A[4]), .Z(g3)); - L6MUX21 mux60 (.D0(g0), .D1(g1), .SD(A[5]), .Z(h0)); - L6MUX21 mux61 (.D0(g2), .D1(g3), .SD(A[5]), .Z(h1)); - L6MUX21 mux7 (.D0(h0), .D1(h1), .SD(A[6]), .Z(Y)); + LUT4 #(.INIT(P_LUT[15: 0])) lut0 (.Z(f0), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + LUT4 #(.INIT(P_LUT[31:16])) lut1 (.Z(f1), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + + LUT4 #(.INIT(P_LUT[47:32])) lut2 (.Z(f2), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + LUT4 #(.INIT(P_LUT[63:48])) lut3 (.Z(f3), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + + LUT4 #(.INIT(P_LUT[79:64])) lut4 (.Z(f4), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + LUT4 #(.INIT(P_LUT[95:80])) lut5 (.Z(f5), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + + LUT4 #(.INIT(P_LUT[111: 96])) lut6 (.Z(f6), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + LUT4 #(.INIT(P_LUT[127:112])) lut7 (.Z(f7), + .A(A[6]), .B(A[5]), .C(A[4]), .D(A[3])); + + PFUMX mux50(.ALUT(f1), .BLUT(f0), .C0(A[2]), .Z(g0)); + PFUMX mux51(.ALUT(f3), .BLUT(f2), .C0(A[2]), .Z(g1)); + PFUMX mux52(.ALUT(f5), .BLUT(f4), .C0(A[2]), .Z(g2)); + PFUMX mux53(.ALUT(f7), .BLUT(f6), .C0(A[2]), .Z(g3)); + L6MUX21 mux60 (.D0(g0), .D1(g1), .SD(A[1]), .Z(h0)); + L6MUX21 mux61 (.D0(g2), .D1(g3), .SD(A[1]), .Z(h1)); + L6MUX21 mux7 (.D0(h0), .D1(h1), .SD(A[0]), .Z(Y)); `endif end else begin wire _TECHMAP_FAIL_ = 1; diff --git a/techlibs/ecp5/cells_sim.v b/techlibs/ecp5/cells_sim.v index 1e4002ee0..430aadbd7 100644 --- a/techlibs/ecp5/cells_sim.v +++ b/techlibs/ecp5/cells_sim.v @@ -9,15 +9,17 @@ module LUT4(input A, B, C, D, output Z); endmodule // --------------------------------------- - +(* abc_box_id=4, lib_whitebox *) module L6MUX21 (input D0, D1, SD, output Z); assign Z = SD ? D1 : D0; endmodule // --------------------------------------- - -module CCU2C(input CIN, A0, B0, C0, D0, A1, B1, C1, D1, - output S0, S1, COUT); +(* abc_box_id=1, abc_carry, lib_whitebox *) +module CCU2C((* abc_carry_in *) input CIN, + input A0, B0, C0, D0, A1, B1, C1, D1, + output S0, S1, + (* abc_carry_out *) output COUT); parameter [15:0] INIT0 = 16'h0000; parameter [15:0] INIT1 = 16'h0000; @@ -26,9 +28,13 @@ module CCU2C(input CIN, A0, B0, C0, D0, A1, B1, C1, D1, // First half wire LUT4_0, LUT2_0; +`ifdef _ABC + assign LUT4_0 = INIT0[{D0, C0, B0, A0}]; + assign LUT2_0 = INIT0[{2'b00, B0, A0}]; +`else LUT4 #(.INIT(INIT0)) lut4_0(.A(A0), .B(B0), .C(C0), .D(D0), .Z(LUT4_0)); LUT2 #(.INIT(INIT0[3:0])) lut2_0(.A(A0), .B(B0), .Z(LUT2_0)); - +`endif wire gated_cin_0 = (INJECT1_0 == "YES") ? 1'b0 : CIN; assign S0 = LUT4_0 ^ gated_cin_0; @@ -37,9 +43,13 @@ module CCU2C(input CIN, A0, B0, C0, D0, A1, B1, C1, D1, // Second half wire LUT4_1, LUT2_1; +`ifdef _ABC + assign LUT4_1 = INIT1[{D1, C1, B1, A1}]; + assign LUT2_1 = INIT1[{2'b00, B1, A1}]; +`else LUT4 #(.INIT(INIT1)) lut4_1(.A(A1), .B(B1), .C(C1), .D(D1), .Z(LUT4_1)); LUT2 #(.INIT(INIT1[3:0])) lut2_1(.A(A1), .B(B1), .Z(LUT2_1)); - +`endif wire gated_cin_1 = (INJECT1_1 == "YES") ? 1'b0 : cout_0; assign S1 = LUT4_1 ^ gated_cin_1; @@ -90,13 +100,13 @@ module TRELLIS_RAM16X2 ( endmodule // --------------------------------------- - +(* abc_box_id=3, lib_whitebox *) module PFUMX (input ALUT, BLUT, C0, output Z); assign Z = C0 ? ALUT : BLUT; endmodule // --------------------------------------- - +(* abc_box_id=2 *) module TRELLIS_DPR16X4 ( input [3:0] DI, input [3:0] WAD, @@ -203,7 +213,7 @@ endmodule // --------------------------------------- -module TRELLIS_FF(input CLK, LSR, CE, DI, M, output reg Q); +module TRELLIS_FF(input CLK, LSR, CE, DI, M, (* abc_flop_q *) output reg Q); parameter GSR = "ENABLED"; parameter [127:0] CEMUX = "1"; parameter CLKMUX = "CLK"; @@ -464,13 +474,13 @@ module DP16KD( input ADA13, ADA12, ADA11, ADA10, ADA9, ADA8, ADA7, ADA6, ADA5, ADA4, ADA3, ADA2, ADA1, ADA0, input CEA, OCEA, CLKA, WEA, RSTA, input CSA2, CSA1, CSA0, - output DOA17, DOA16, DOA15, DOA14, DOA13, DOA12, DOA11, DOA10, DOA9, DOA8, DOA7, DOA6, DOA5, DOA4, DOA3, DOA2, DOA1, DOA0, + (* abc_flop_q *) output DOA17, DOA16, DOA15, DOA14, DOA13, DOA12, DOA11, DOA10, DOA9, DOA8, DOA7, DOA6, DOA5, DOA4, DOA3, DOA2, DOA1, DOA0, input DIB17, DIB16, DIB15, DIB14, DIB13, DIB12, DIB11, DIB10, DIB9, DIB8, DIB7, DIB6, DIB5, DIB4, DIB3, DIB2, DIB1, DIB0, input ADB13, ADB12, ADB11, ADB10, ADB9, ADB8, ADB7, ADB6, ADB5, ADB4, ADB3, ADB2, ADB1, ADB0, input CEB, OCEB, CLKB, WEB, RSTB, input CSB2, CSB1, CSB0, - output DOB17, DOB16, DOB15, DOB14, DOB13, DOB12, DOB11, DOB10, DOB9, DOB8, DOB7, DOB6, DOB5, DOB4, DOB3, DOB2, DOB1, DOB0 + (* abc_flop_q *) output DOB17, DOB16, DOB15, DOB14, DOB13, DOB12, DOB11, DOB10, DOB9, DOB8, DOB7, DOB6, DOB5, DOB4, DOB3, DOB2, DOB1, DOB0 ); parameter DATA_WIDTH_A = 18; parameter DATA_WIDTH_B = 18; diff --git a/techlibs/ecp5/synth_ecp5.cc b/techlibs/ecp5/synth_ecp5.cc index c6e12248e..b271500f1 100644 --- a/techlibs/ecp5/synth_ecp5.cc +++ b/techlibs/ecp5/synth_ecp5.cc @@ -82,6 +82,9 @@ struct SynthEcp5Pass : public ScriptPass log(" -abc2\n"); log(" run two passes of 'abc' for slightly improved logic density\n"); log("\n"); + log(" -abc9\n"); + log(" use new ABC9 flow (EXPERIMENTAL)\n"); + log("\n"); log(" -vpr\n"); log(" generate an output netlist (and BLIF file) suitable for VPR\n"); log(" (this feature is experimental and incomplete)\n"); @@ -93,7 +96,7 @@ struct SynthEcp5Pass : public ScriptPass } string top_opt, blif_file, edif_file, json_file; - bool noccu2, nodffe, nobram, nodram, nomux, flatten, retime, abc2, vpr; + bool noccu2, nodffe, nobram, nodram, nomux, flatten, retime, abc2, abc9, vpr; void clear_flags() YS_OVERRIDE { @@ -110,6 +113,7 @@ struct SynthEcp5Pass : public ScriptPass retime = false; abc2 = false; vpr = false; + abc9 = false; } void execute(std::vector args, RTLIL::Design *design) YS_OVERRIDE @@ -184,6 +188,10 @@ struct SynthEcp5Pass : public ScriptPass vpr = true; continue; } + if (args[argidx] == "-abc9") { + abc9 = true; + continue; + } break; } extra_args(args, argidx, design); @@ -203,7 +211,7 @@ struct SynthEcp5Pass : public ScriptPass { if (check_label("begin")) { - run("read_verilog -lib +/ecp5/cells_sim.v +/ecp5/cells_bb.v"); + run("read_verilog -D_ABC -lib +/ecp5/cells_sim.v +/ecp5/cells_bb.v"); run(stringf("hierarchy -check %s", help_mode ? "-top " : top_opt.c_str())); } @@ -264,10 +272,15 @@ struct SynthEcp5Pass : public ScriptPass run("abc", " (only if -abc2)"); } run("techmap -map +/ecp5/latches_map.v"); - if (nomux) - run("abc -lut 4 -dress"); - else - run("abc -lut 4:7 -dress"); + if (abc9) { + run("abc9 -lut +/ecp5/abc_5g.lut -box +/ecp5/abc_5g.box -W 200"); + } else { + if (nomux) + run("abc -lut 4 -dress"); + else + run("abc -lut 4:7 -dress"); + } + run("clean"); } -- 2.30.2