From b004f0901873962ba4a6fd3e12c7bc0cc1e04032 Mon Sep 17 00:00:00 2001 From: Dan Ravensloft Date: Sat, 25 Apr 2020 17:25:59 +0100 Subject: [PATCH] intel_alm: DSP inference --- techlibs/intel_alm/Makefile.inc | 2 + techlibs/intel_alm/common/dsp_map.v | 49 +++++++++++++++++ techlibs/intel_alm/common/dsp_sim.v | 35 +++++++++++++ techlibs/intel_alm/common/megafunction_bb.v | 28 ++++++++++ techlibs/intel_alm/common/quartus_rename.v | 23 ++++++++ techlibs/intel_alm/synth_intel_alm.cc | 58 +++++++++++++++++---- tests/arch/intel_alm/mul.ys | 23 ++++++++ 7 files changed, 209 insertions(+), 9 deletions(-) create mode 100644 techlibs/intel_alm/common/dsp_map.v create mode 100644 techlibs/intel_alm/common/dsp_sim.v create mode 100644 tests/arch/intel_alm/mul.ys diff --git a/techlibs/intel_alm/Makefile.inc b/techlibs/intel_alm/Makefile.inc index 477be6353..552f00c65 100644 --- a/techlibs/intel_alm/Makefile.inc +++ b/techlibs/intel_alm/Makefile.inc @@ -10,6 +10,8 @@ $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/al $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/arith_alm_map.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/dff_map.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/dff_sim.v)) +$(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/dsp_sim.v)) +$(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/dsp_map.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/mem_sim.v)) # RAM diff --git a/techlibs/intel_alm/common/dsp_map.v b/techlibs/intel_alm/common/dsp_map.v new file mode 100644 index 000000000..d1bc25e65 --- /dev/null +++ b/techlibs/intel_alm/common/dsp_map.v @@ -0,0 +1,49 @@ +module __MUL27X27(A, B, Y); + +parameter A_SIGNED = 1; +parameter B_SIGNED = 1; +parameter A_WIDTH = 27; +parameter B_WIDTH = 27; +parameter Y_WIDTH = 54; + +input [A_WIDTH-1:0] A; +input [B_WIDTH-1:0] B; +output [Y_WIDTH-1:0] Y; + +MISTRAL_MUL27X27 _TECHMAP_REPLACE_ (.A(A), .B(B), .Y(Y)); + +endmodule + + +module __MUL18X18(A, B, Y); + +parameter A_SIGNED = 1; +parameter B_SIGNED = 1; +parameter A_WIDTH = 18; +parameter B_WIDTH = 18; +parameter Y_WIDTH = 36; + +input [A_WIDTH-1:0] A; +input [B_WIDTH-1:0] B; +output [Y_WIDTH-1:0] Y; + +MISTRAL_MUL18X18 _TECHMAP_REPLACE_ (.A(A), .B(B), .Y(Y)); + +endmodule + + +module __MUL9X9(A, B, Y); + +parameter A_SIGNED = 1; +parameter B_SIGNED = 1; +parameter A_WIDTH = 9; +parameter B_WIDTH = 9; +parameter Y_WIDTH = 18; + +input [A_WIDTH-1:0] A; +input [B_WIDTH-1:0] B; +output [Y_WIDTH-1:0] Y; + +MISTRAL_MUL9X9 _TECHMAP_REPLACE_ (.A(A), .B(B), .Y(Y)); + +endmodule diff --git a/techlibs/intel_alm/common/dsp_sim.v b/techlibs/intel_alm/common/dsp_sim.v new file mode 100644 index 000000000..5dc4c02de --- /dev/null +++ b/techlibs/intel_alm/common/dsp_sim.v @@ -0,0 +1,35 @@ +(* abc9_box *) +module MISTRAL_MUL27x27(input [26:0] A, input [26:0] B, output [53:0] Y); + +specify + (A *> Y) = 4057; + (B *> Y) = 4057; +endspecify + +assign Y = $signed(A) * $signed(B); + +endmodule + +(* abc9_box *) +module MISTRAL_MUL18X18(input [17:0] A, input [17:0] B, output [35:0] Y); + +specify + (A *> Y) = 4057; + (B *> Y) = 4057; +endspecify + +assign Y = $signed(A) * $signed(B); + +endmodule + +(* abc9_box *) +module MISTRAL_MUL9X9(input [8:0] A, input [8:0] B, output [17:0] Y); + +specify + (A *> Y) = 4057; + (B *> Y) = 4057; +endspecify + +assign Y = $signed(A) * $signed(B); + +endmodule diff --git a/techlibs/intel_alm/common/megafunction_bb.v b/techlibs/intel_alm/common/megafunction_bb.v index c749fa70b..b5a3d8892 100644 --- a/techlibs/intel_alm/common/megafunction_bb.v +++ b/techlibs/intel_alm/common/megafunction_bb.v @@ -129,3 +129,31 @@ output [data_width-1:0] portbdataout; input ena0, clk0, clk1; endmodule + +(* blackbox *) +module cyclonev_mac(ax, ay, resulta); + +parameter ax_width = 9; +parameter ay_scan_in_width = 9; +parameter result_a_width = 18; +parameter operation_mode = "M9x9"; + +input [ax_width-1:0] ax; +input [ay_scan_in_width-1:0] ay; +output [result_a_width-1:0] resulta; + +endmodule + +(* blackbox *) +module cyclone10gx_mac(ax, ay, resulta); + +parameter ax_width = 18; +parameter ay_scan_in_width = 18; +parameter result_a_width = 36; +parameter operation_mode = "M18X18_FULL"; + +input [ax_width-1:0] ax; +input [ay_scan_in_width-1:0] ay; +output [result_a_width-1:0] resulta; + +endmodule \ No newline at end of file diff --git a/techlibs/intel_alm/common/quartus_rename.v b/techlibs/intel_alm/common/quartus_rename.v index c40a4e02d..46ef2aa0d 100644 --- a/techlibs/intel_alm/common/quartus_rename.v +++ b/techlibs/intel_alm/common/quartus_rename.v @@ -1,9 +1,11 @@ `ifdef cyclonev `define LCELL cyclonev_lcell_comb +`define MAC cyclonev_mac `define MLAB cyclonev_mlab_cell `endif `ifdef cyclone10gx `define LCELL cyclone10gx_lcell_comb +`define MAC cyclone10gx_mac `define MLAB cyclone10gx_mlab_cell `endif @@ -119,3 +121,24 @@ module MISTRAL_MLAB(input [4:0] A1ADDR, input A1DATA, A1EN, CLK1, input [4:0] B1 ); endmodule + + +module MISTRAL_MUL27X27(input [26:0] A, B, output [53:0] Y); + +`MAC #(.ax_width(27), .ay_scan_in_width(27), .result_a_width(54), .operation_mode("M27x27")) _TECHMAP_REPLACE_ (.ax(A), .ay(B), .resulta(Y)); + +endmodule + + +module MISTRAL_MUL18X18(input [17:0] A, B, output [35:0] Y); + +`MAC #(.ax_width(18), .ay_scan_in_width(18), .result_a_width(36), .operation_mode("M18x18_FULL")) _TECHMAP_REPLACE_ (.ax(B), .ay(A), .resulta(Y)); + +endmodule + + +module MISTRAL_MUL9X9(input [8:0] A, B, output [17:0] Y); + +`MAC #(.ax_width(9), .ay_scan_in_width(9), .result_a_width(18), .operation_mode("M9x9")) _TECHMAP_REPLACE_ (.ax(A), .ay(B), .resulta(Y)); + +endmodule diff --git a/techlibs/intel_alm/synth_intel_alm.cc b/techlibs/intel_alm/synth_intel_alm.cc index 417027462..b751e8413 100644 --- a/techlibs/intel_alm/synth_intel_alm.cc +++ b/techlibs/intel_alm/synth_intel_alm.cc @@ -69,13 +69,16 @@ struct SynthIntelALMPass : public ScriptPass { log(" -nobram\n"); log(" do not use block RAM cells in output netlist\n"); log("\n"); + log(" -nodsp\n"); + log(" do not map multipliers to MISTRAL_MUL cells\n"); + log("\n"); log("The following commands are executed by this synthesis command:\n"); help_script(); log("\n"); } string top_opt, family_opt, bram_type, vout_file; - bool flatten, quartus, nolutram, nobram, dff; + bool flatten, quartus, nolutram, nobram, dff, nodsp; void clear_flags() override { @@ -88,6 +91,7 @@ struct SynthIntelALMPass : public ScriptPass { nolutram = false; nobram = false; dff = false; + nodsp = false; } void execute(std::vector args, RTLIL::Design *design) override @@ -130,6 +134,10 @@ struct SynthIntelALMPass : public ScriptPass { nobram = true; continue; } + if (args[argidx] == "-nodsp") { + nodsp = true; + continue; + } if (args[argidx] == "-noflatten") { flatten = false; continue; @@ -169,9 +177,11 @@ struct SynthIntelALMPass : public ScriptPass { } if (check_label("begin")) { - run(stringf("read_verilog -sv -lib +/intel/%s/cells_sim.v", family_opt.c_str())); + if (family_opt == "cyclonev") + run(stringf("read_verilog -sv -lib +/intel/%s/cells_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/alm_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/dff_sim.v", family_opt.c_str())); + run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/dsp_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/mem_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s -icells +/intel_alm/common/abc9_model.v", family_opt.c_str())); @@ -181,16 +191,46 @@ struct SynthIntelALMPass : public ScriptPass { run(stringf("hierarchy -check %s", help_mode ? "-top " : top_opt.c_str())); } - if (flatten && check_label("flatten", "(unless -noflatten)")) { + if (check_label("coarse")) { run("proc"); - run("flatten"); + if (flatten || help_mode) + run("flatten", "(skip if -noflatten)"); run("tribuf -logic"); run("deminout"); - } - - if (check_label("coarse")) { - run("synth -run coarse -lut 6"); - run("techmap -map +/intel_alm/common/arith_alm_map.v"); + run("opt_expr"); + run("opt_clean"); + run("check"); + run("opt"); + run("wreduce"); + run("peepopt"); + run("opt_clean"); + run("share"); + run("techmap -map +/cmp2lut.v -D LUT_WIDTH=6"); + run("opt_expr"); + run("opt_clean"); + if (help_mode) { + run("techmap -map +/mul2dsp.v [...]", "(unless -nodsp)"); + } else if (!nodsp) { + // Cyclone V supports 9x9 multiplication, Cyclone 10 GX does not. + run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=27 -D DSP_B_MAXWIDTH=27 -D DSP_A_MINWIDTH=19 -D DSP_B_MINWIDTH=19 -D DSP_SIGNEDONLY -D DSP_NAME=__MUL27X27"); + run("chtype -set $mul t:$__soft_mul"); + if (family_opt == "cyclonev") { + run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=18 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=10 -D DSP_B_MINWIDTH=10 -D DSP_SIGNEDONLY -D DSP_NAME=__MUL18X18"); + run("chtype -set $mul t:$__soft_mul"); + run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=9 -D DSP_B_MAXWIDTH=9 -D DSP_A_MINWIDTH=4 -D DSP_B_MINWIDTH=4 -D DSP_SIGNEDONLY -D DSP_NAME=__MUL9X9"); + run("chtype -set $mul t:$__soft_mul"); + } else if (family_opt == "cyclone10gx") { + run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=18 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=4 -D DSP_B_MINWIDTH=4 -D DSP_SIGNEDONLY -D DSP_NAME=__MUL18X18"); + run("chtype -set $mul t:$__soft_mul"); + } + } + run("alumacc"); + run("techmap -map +/intel_alm/common/arith_alm_map.v -map +/intel_alm/common/dsp_map.v"); + run("opt"); + run("fsm"); + run("opt -fast"); + run("memory -nomap"); + run("opt_clean"); } if (!nobram && check_label("map_bram", "(skip if -nobram)")) { diff --git a/tests/arch/intel_alm/mul.ys b/tests/arch/intel_alm/mul.ys new file mode 100644 index 000000000..92f00156a --- /dev/null +++ b/tests/arch/intel_alm/mul.ys @@ -0,0 +1,23 @@ +read_verilog ../common/mul.v +hierarchy -top top +proc +equiv_opt -assert -map +/intel_alm/common/dsp_sim.v synth_intel_alm -family cyclonev # equivalency check +design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design) +cd top # Constrain all select calls below inside the top module + +stat + +select -assert-count 1 t:MISTRAL_MUL9X9 +select -assert-none t:MISTRAL_MUL9X9 %% t:* %D + +design -reset +read_verilog ../common/mul.v +hierarchy -top top +proc +equiv_opt -assert -map +/intel_alm/common/dsp_sim.v synth_intel_alm -family cyclone10gx # equivalency check +design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design) +cd top # Constrain all select calls below inside the top module + +# Cyclone 10 GX does not have 9x9 multipliers, so we use 18x18. +select -assert-count 1 t:MISTRAL_MUL18X18 +select -assert-none t:MISTRAL_MUL18X18 %% t:* %D -- 2.30.2