Add mul2dsp multiplier splitting rule and ECP5 mapping
authorDavid Shah <dave@ds0.me>
Mon, 8 Jul 2019 14:40:12 +0000 (15:40 +0100)
committerDavid Shah <dave@ds0.me>
Mon, 8 Jul 2019 17:42:09 +0000 (18:42 +0100)
Signed-off-by: David Shah <dave@ds0.me>
techlibs/common/Makefile.inc
techlibs/common/mul2dsp.v [new file with mode: 0644]
techlibs/ecp5/Makefile.inc
techlibs/ecp5/dsp_map.v [new file with mode: 0644]
techlibs/ecp5/synth_ecp5.cc

index 0e05620bc6e9250d674f3a3ab462523d8b25f056..e6d1c2f2940bf9fa23e2ddfd44420ab70110748d 100644 (file)
@@ -28,3 +28,4 @@ $(eval $(call add_share_file,share,techlibs/common/dff2ff.v))
 $(eval $(call add_share_file,share,techlibs/common/gate2lut.v))
 $(eval $(call add_share_file,share,techlibs/common/cmp2lut.v))
 $(eval $(call add_share_file,share,techlibs/common/cells.lib))
+$(eval $(call add_share_file,share,techlibs/common/mul2dsp.v))
diff --git a/techlibs/common/mul2dsp.v b/techlibs/common/mul2dsp.v
new file mode 100644 (file)
index 0000000..37ce2e4
--- /dev/null
@@ -0,0 +1,237 @@
+// From Eddie Hung\r
+// extracted from: https://github.com/eddiehung/vtr-with-yosys/blob/vtr7-with-yosys/vtr_flow/misc/yosys_models.v#L220\r
+// revised by Andre DeHon\r
+// further revised by David Shah\r
+`ifndef DSP_A_MAXWIDTH\r
+`define DSP_A_MAXWIDTH 18\r
+`endif\r
+`ifndef DSP_A_MAXWIDTH\r
+`define DSP_B_MAXWIDTH 25\r
+`endif\r
+\r
+`ifndef ADDER_MINWIDTH\r
+`define ADDER_MINWIDTH AAA\r
+`endif\r
+\r
+`ifndef DSP_NAME\r
+`define DSP_NAME M18x25\r
+`endif\r
+\r
+`define MAX(a,b) (a > b ? a : b)\r
+`define MIN(a,b) (a < b ? a : b)\r
+\r
+(* techmap_celltype = "$mul" *)\r
+module \$mul (A, B, Y); \r
+       parameter A_SIGNED = 0;\r
+       parameter B_SIGNED = 0;\r
+       parameter A_WIDTH = 1;\r
+       parameter B_WIDTH = 1;\r
+       parameter Y_WIDTH = 1;\r
+\r
+       input [A_WIDTH-1:0] A;\r
+       input [B_WIDTH-1:0] B;\r
+       output [Y_WIDTH-1:0] Y;\r
+\r
+       wire [1023:0] _TECHMAP_DO_ = "proc; clean";\r
+\r
+  generate\r
+    if (A_WIDTH<B_WIDTH) begin\r
+       generate\r
+               \$__mul_gen #(\r
+                       .A_SIGNED(A_SIGNED),\r
+                       .B_SIGNED(B_SIGNED),\r
+                       .A_WIDTH(A_WIDTH),\r
+                       .B_WIDTH(B_WIDTH),\r
+                       .Y_WIDTH(Y_WIDTH)\r
+               ) mul_slice (\r
+                       .A(A),\r
+                       .B(B),\r
+                       .Y(Y[Y_WIDTH-1:0])\r
+               );\r
+       endgenerate\r
+       end\r
+    else begin\r
+       generate\r
+               \$__mul_gen #(\r
+                       .A_SIGNED(B_SIGNED),\r
+                       .B_SIGNED(A_SIGNED),\r
+                       .A_WIDTH(B_WIDTH),\r
+                       .B_WIDTH(A_WIDTH),\r
+                       .Y_WIDTH(Y_WIDTH)\r
+               ) mul_slice (\r
+                       .A(B),\r
+                       .B(A),\r
+                       .Y(Y[Y_WIDTH-1:0])\r
+               );\r
+       endgenerate\r
+     end\r
+   endgenerate \r
+endmodule\r
+\r
+module \$__mul_gen (A, B, Y);\r
+       parameter A_SIGNED = 0;\r
+       parameter B_SIGNED = 0;\r
+       parameter A_WIDTH = 1;\r
+       parameter B_WIDTH = 1;\r
+       parameter Y_WIDTH = 1;\r
+\r
+       input [A_WIDTH-1:0] A;\r
+       input [B_WIDTH-1:0] B;\r
+       output [Y_WIDTH-1:0] Y;\r
+\r
+       wire [1023:0] _TECHMAP_DO_ = "proc; clean";\r
+\r
+       generate\r
+       if (A_WIDTH > `DSP_A_MAXWIDTH) begin\r
+                       localparam n_floored = A_WIDTH/`DSP_A_MAXWIDTH;\r
+                       localparam n = n_floored + (n_floored*`DSP_A_MAXWIDTH < A_WIDTH ? 1 : 0);\r
+                       wire [`DSP_A_MAXWIDTH+B_WIDTH-1:0] partial [n-1:1];\r
+                       wire [Y_WIDTH-1:0] partial_sum [n-2:0];\r
+\r
+                       \$__mul_gen #(\r
+                               .A_SIGNED(A_SIGNED),\r
+                               .B_SIGNED(B_SIGNED),\r
+                               .A_WIDTH(`DSP_A_MAXWIDTH),\r
+                               .B_WIDTH(B_WIDTH),\r
+                               .Y_WIDTH(B_WIDTH+`DSP_A_MAXWIDTH)\r
+                       ) mul_slice_first (\r
+                               .A(A[`DSP_A_MAXWIDTH-1:0]),\r
+                               .B(B),\r
+                               .Y(partial_sum[0][B_WIDTH+`DSP_A_MAXWIDTH-1:0])\r
+                       );\r
+                        assign partial_sum[0][Y_WIDTH-1:B_WIDTH+`DSP_A_MAXWIDTH]=0;\r
+\r
+                       genvar i;\r
+                       generate\r
+                       for (i = 1; i < n-1; i=i+1) begin:slice\r
+                               \$__mul_gen #(\r
+                                       .A_SIGNED(A_SIGNED),\r
+                                       .B_SIGNED(B_SIGNED),\r
+                                       .A_WIDTH(`DSP_A_MAXWIDTH),\r
+                                       .B_WIDTH(B_WIDTH),\r
+                                       .Y_WIDTH(B_WIDTH+`DSP_A_MAXWIDTH)\r
+                               ) mul_slice (\r
+                                       .A(A[(i+1)*`DSP_A_MAXWIDTH-1:i*`DSP_A_MAXWIDTH]),\r
+                                       .B(B),\r
+                                       .Y(partial[i][B_WIDTH+`DSP_A_MAXWIDTH-1:0])\r
+                               );\r
+                               //assign partial_sum[i] = (partial[i] << i*`DSP_A_MAXWIDTH) + partial_sum[i-1];\r
+                               assign partial_sum[i] = {\r
+                                       partial[i][B_WIDTH+`DSP_A_MAXWIDTH-1:0]\r
+                                       + partial_sum[i-1][Y_WIDTH-1:(i*`DSP_A_MAXWIDTH)],\r
+                                       partial_sum[i-1][(i*`DSP_A_MAXWIDTH)-1:0]\r
+                               };\r
+                       end\r
+                       endgenerate\r
+\r
+                       \$__mul_gen #(\r
+                               .A_SIGNED(A_SIGNED),\r
+                               .B_SIGNED(B_SIGNED),\r
+                               .A_WIDTH(A_WIDTH-(n-1)*`DSP_A_MAXWIDTH),\r
+                               .B_WIDTH(B_WIDTH),\r
+                               .Y_WIDTH(A_WIDTH-(n-1)*`DSP_A_MAXWIDTH+B_WIDTH),\r
+                       ) mul_slice_last (\r
+                               .A(A[A_WIDTH-1:(n-1)*`DSP_A_MAXWIDTH]),\r
+                               .B(B),\r
+                               .Y(partial[n-1][A_WIDTH-(n-1)*`DSP_A_MAXWIDTH+B_WIDTH-1:0])\r
+                       );\r
+                       //assign Y = (partial[n-1] << (n-1)*`DSP_A_MAXWIDTH) + partial_sum[n-2];\r
+                       assign Y = {\r
+                               partial[n-1][A_WIDTH-(n-1)*`DSP_A_MAXWIDTH+B_WIDTH:0]\r
+                               + partial_sum[n-2][Y_WIDTH-1:((n-1)*`DSP_A_MAXWIDTH)],\r
+                               partial_sum[n-2][((n-1)*`DSP_A_MAXWIDTH)-1:0]\r
+                       };\r
+               end\r
+               else if (B_WIDTH > `DSP_B_MAXWIDTH) begin\r
+                       localparam n_floored = B_WIDTH/`DSP_B_MAXWIDTH;\r
+                       localparam n = n_floored + (n_floored*`DSP_B_MAXWIDTH < B_WIDTH ? 1 : 0);\r
+                       wire [A_WIDTH+`DSP_B_MAXWIDTH-1:0] partial [n-1:1];\r
+                       wire [Y_WIDTH-1:0] partial_sum [n-2:0];\r
+\r
+                       \$__mul_gen #(\r
+                               .A_SIGNED(A_SIGNED),\r
+                               .B_SIGNED(B_SIGNED),\r
+                               .A_WIDTH(A_WIDTH),\r
+                               .B_WIDTH(`DSP_B_MAXWIDTH),\r
+                               .Y_WIDTH(A_WIDTH+`DSP_B_MAXWIDTH)\r
+                       ) mul_first (\r
+                               .A(A),\r
+                               .B(B[`DSP_B_MAXWIDTH-1:0]),\r
+                               .Y(partial_sum[0][A_WIDTH+`DSP_B_MAXWIDTH-1:0])\r
+                       );\r
+                        assign partial_sum[0][Y_WIDTH-1:A_WIDTH+`DSP_B_MAXWIDTH]=0;\r
+\r
+                       genvar i;\r
+                       generate\r
+                       for (i = 1; i < n-1; i=i+1) begin:slice\r
+                               \$__mul_gen #(\r
+                                       .A_SIGNED(A_SIGNED),\r
+                                       .B_SIGNED(B_SIGNED),\r
+                                       .A_WIDTH(A_WIDTH),\r
+                                       .B_WIDTH(`DSP_B_MAXWIDTH),\r
+                                       .Y_WIDTH(A_WIDTH+`DSP_B_MAXWIDTH)\r
+                               ) mul (\r
+                                       .A(A),\r
+                                       .B(B[(i+1)*`DSP_B_MAXWIDTH-1:i*`DSP_B_MAXWIDTH]),\r
+                                       .Y(partial[i][A_WIDTH+`DSP_B_MAXWIDTH-1:0])\r
+                               );\r
+                               //assign partial_sum[i] = (partial[i] << i*`DSP_B_MAXWIDTH) + partial_sum[i-1];\r
+                                // was:\r
+                               //assign partial_sum[i] = {\r
+                               //  partial[i][A_WIDTH+`DSP_B_MAXWIDTH-1:`DSP_B_MAXWIDTH], \r
+                               //      partial[i][`DSP_B_MAXWIDTH-1:0] + partial_sum[i-1][A_WIDTH+(i*`DSP_B_MAXWIDTH)-1:A_WIDTH+((i-1)*`DSP_B_MAXWIDTH)],\r
+                               //      partial_sum[i-1][A_WIDTH+((i-1)*`DSP_B_MAXWIDTH):0]\r
+                               assign partial_sum[i] = {\r
+                                       partial[i][A_WIDTH+`DSP_B_MAXWIDTH-1:0]\r
+                                       + partial_sum[i-1][Y_WIDTH-1:(i*`DSP_B_MAXWIDTH)],\r
+                                       partial_sum[i-1][(i*`DSP_B_MAXWIDTH)-1:0] \r
+                               };\r
+                       end\r
+                       endgenerate\r
+\r
+                       \$__mul_gen #(\r
+                               .A_SIGNED(A_SIGNED),\r
+                               .B_SIGNED(B_SIGNED),\r
+                               .A_WIDTH(A_WIDTH),\r
+                               .B_WIDTH(B_WIDTH-(n-1)*`DSP_B_MAXWIDTH),\r
+                               .Y_WIDTH(A_WIDTH+B_WIDTH-(n-1)*`DSP_B_MAXWIDTH)\r
+                       ) mul_last (\r
+                               .A(A),\r
+                               .B(B[B_WIDTH-1:(n-1)*`DSP_B_MAXWIDTH]),\r
+                               .Y(partial[n-1][A_WIDTH+B_WIDTH-(n-1)*`DSP_B_MAXWIDTH-1:0])\r
+                       );\r
+                        // AMD: this came comment out -- looks closer to right answer\r
+                       //assign Y = (partial[n-1] << (n-1)*`DSP_B_MAXWIDTH) + partial_sum[n-2];\r
+                        // was (looks broken)\r
+                       //assign Y = {\r
+                       //      partial[n-1][A_WIDTH+`DSP_B_MAXWIDTH-1:`DSP_B_MAXWIDTH],\r
+                       //      partial[n-1][`DSP_B_MAXWIDTH-1:0] + partial_sum[n-2][A_WIDTH+((n-1)*`DSP_B_MAXWIDTH)-1:A_WIDTH+((n-2)*`DSP_B_MAXWIDTH)],\r
+                       //      partial_sum[n-2][A_WIDTH+((n-2)*`DSP_B_MAXWIDTH):0]\r
+                       assign Y = {\r
+                               partial[n-1][A_WIDTH+B_WIDTH-(n-1)*`DSP_B_MAXWIDTH-1:0]\r
+                               + partial_sum[n-2][Y_WIDTH-1:((n-1)*`DSP_B_MAXWIDTH)],\r
+                               partial_sum[n-2][((n-1)*`DSP_B_MAXWIDTH)-1:0]\r
+                       };\r
+               end\r
+               else begin \r
+                       wire [A_WIDTH+B_WIDTH-1:0] out;\r
+                       wire [(`DSP_A_MAXWIDTH+`DSP_B_MAXWIDTH)-(A_WIDTH+B_WIDTH)-1:0] dummy;\r
+                       wire Asign, Bsign;\r
+                       assign Asign = (A_SIGNED ? A[A_WIDTH-1] : 1'b0);\r
+                       assign Bsign = (B_SIGNED ? B[B_WIDTH-1] : 1'b0);\r
+                       `DSP_NAME _TECHMAP_REPLACE_ (\r
+                               .A({ {{`DSP_A_MAXWIDTH-A_WIDTH}{Asign}}, A }),\r
+                               .B({ {{`DSP_B_MAXWIDTH-B_WIDTH}{Bsign}}, B }),\r
+                               .OUT({dummy, out})\r
+                       );\r
+                       if (Y_WIDTH < A_WIDTH+B_WIDTH)\r
+                               assign Y = out[Y_WIDTH-1:0];\r
+                       else begin\r
+                               wire Ysign = (A_SIGNED || B_SIGNED ? out[A_WIDTH+BWIDTH-1] : 1'b0);\r
+                               assign Y = { {{Y_WIDTH-(A_WIDTH+B_WIDTH)}{Ysign}}, out[A_WIDTH+B_WIDTH-1:0] };\r
+                       end\r
+               end\r
+       endgenerate\r
+endmodule\r
+\r
+\r
index ff39ba4fe1563fa837980ee7cdf474c4b952ba82..a2f5cadeeaf8b144bf20a66cceee548a371622dd 100644 (file)
@@ -10,6 +10,7 @@ $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/brams_map.v))
 $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/bram.txt))
 $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/arith_map.v))
 $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/latches_map.v))
+$(eval $(call add_share_file,share/ecp5,techlibs/ecp5/dsp_map.v))
 
 $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/abc_5g.box))
 $(eval $(call add_share_file,share/ecp5,techlibs/ecp5/abc_5g.lut))
diff --git a/techlibs/ecp5/dsp_map.v b/techlibs/ecp5/dsp_map.v
new file mode 100644 (file)
index 0000000..22e3057
--- /dev/null
@@ -0,0 +1,10 @@
+module \$__MUL18X18 (input [17:0] A, input [17:0] B, output [35:0] OUT);
+       MULT18X18D mult_i(
+               .A0(A[0]), .A1(A[1]), .A2(A[2]), .A3(A[3]), .A4(A[4]), .A5(A[5]), .A6(A[6]), .A7(A[7]), .A8(A[8]), .A9(A[9]), .A10(A[10]), .A11(A[11]), .A12(A[12]), .A13(A[13]), .A14(A[14]), .A15(A[15]), .A16(A[16]), .A17(A[17]),
+               .B0(B[0]), .B1(B[1]), .B2(B[2]), .B3(B[3]), .B4(B[4]), .B5(B[5]), .B6(B[6]), .B7(B[7]), .B8(B[8]), .B9(B[9]), .B10(B[10]), .B11(B[11]), .B12(B[12]), .B13(B[13]), .B14(B[14]), .B15(B[15]), .B16(B[16]), .B17(B[17]),
+               .C17(1'b0), .C16(1'b0), .C15(1'b0), .C14(1'b0), .C13(1'b0), .C12(1'b0), .C11(1'b0), .C10(1'b0), .C9(1'b0), .C8(1'b0), .C7(1'b0), .C6(1'b0), .C5(1'b0), .C4(1'b0), .C3(1'b0), .C2(1'b0), .C1(1'b0), .C0(1'b0),
+               .SIGNEDA(1'b0), .SIGNEDB(1'b0), .SOURCEA(1'b0), .SOURCEB(1'b0),
+
+               .P0(OUT[0]), .P1(OUT[1]), .P2(OUT[2]), .P3(OUT[3]), .P4(OUT[4]), .P5(OUT[5]), .P6(OUT[6]), .P7(OUT[7]), .P8(OUT[8]), .P9(OUT[9]), .P10(OUT[10]), .P11(OUT[11]), .P12(OUT[12]), .P13(OUT[13]), .P14(OUT[14]), .P15(OUT[15]), .P16(OUT[16]), .P17(OUT[17]), .P18(OUT[18]), .P19(OUT[19]), .P20(OUT[20]), .P21(OUT[21]), .P22(OUT[22]), .P23(OUT[23]), .P24(OUT[24]), .P25(OUT[25]), .P26(OUT[26]), .P27(OUT[27]), .P28(OUT[28]), .P29(OUT[29]), .P30(OUT[30]), .P31(OUT[31]), .P32(OUT[32]), .P33(OUT[33]), .P34(OUT[34]), .P35(OUT[35])
+       );
+endmodule
\ No newline at end of file
index f16a47f01b975808bb6c0ea7e74e904cfbd3b3d6..3b4185930af67daaa9325de4e4a0c22c67aff8a4 100644 (file)
@@ -89,6 +89,9 @@ struct SynthEcp5Pass : public ScriptPass
                log("        generate an output netlist (and BLIF file) suitable for VPR\n");
                log("        (this feature is experimental and incomplete)\n");
                log("\n");
+               log("    -dsp\n");
+               log("        map multipliers to MULT18X18D (EXPERIMENTAL)\n");
+               log("\n");
                log("\n");
                log("The following commands are executed by this synthesis command:\n");
                help_script();
@@ -96,7 +99,7 @@ struct SynthEcp5Pass : public ScriptPass
        }
 
        string top_opt, blif_file, edif_file, json_file;
-       bool noccu2, nodffe, nobram, nodram, nowidelut, flatten, retime, abc2, abc9, vpr;
+       bool noccu2, nodffe, nobram, nodram, nowidelut, flatten, retime, abc2, abc9, dsp, vpr;
 
        void clear_flags() YS_OVERRIDE
        {
@@ -114,6 +117,7 @@ struct SynthEcp5Pass : public ScriptPass
                abc2 = false;
                vpr = false;
                abc9 = false;
+               dsp = false;
        }
 
        void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@@ -192,6 +196,10 @@ struct SynthEcp5Pass : public ScriptPass
                                abc9 = true;
                                continue;
                        }
+                       if (args[argidx] == "-dsp") {
+                               dsp = true;
+                               continue;
+                       }
                        break;
                }
                extra_args(args, argidx, design);
@@ -225,7 +233,28 @@ struct SynthEcp5Pass : public ScriptPass
 
                if (check_label("coarse"))
                {
-                       run("synth -run coarse");
+                       run("opt_expr");
+                       run("opt_clean");
+                       run("check");
+                       run("opt");
+                       run("wreduce");
+                       run("peepopt");
+                       run("opt_clean");
+                       run("share");
+                       run("techmap -map +/cmp2lut.v -D LUT_WIDTH=4");
+                       run("opt_expr");
+                       run("opt_clean");
+                       if (dsp) {
+                               run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=18 -D DSP_B_MAXWIDTH=18 -D DSP_NAME=$__MUL18X18");
+                               run("clean");
+                               run("techmap -map +/ecp5/dsp_map.v");
+                       }
+                       run("alumacc");
+                       run("opt");
+                       run("fsm");
+                       run("opt -fast");
+                       run("memory -nomap");
+                       run("opt_clean");
                }
 
                if (!nobram && check_label("bram", "(skip if -nobram)"))