From 5b779f7f4ef0bf2c4ad3a412da24fad30b078626 Mon Sep 17 00:00:00 2001 From: Dan Ravensloft Date: Thu, 16 Apr 2020 12:24:04 +0100 Subject: [PATCH] intel_alm: direct LUTRAM cell instantiation By instantiating the LUTRAM cell directly, we avoid a trip through altsyncram, which speeds up Quartus synthesis time. This also gives a little more flexibility, as Yosys can build RAMs out of individual 32x1 LUTRAM cells. While working on this, I discovered that the mem_init0 parameter of _mlab_cell gets ignored by Quartus. --- techlibs/intel_alm/Makefile.inc | 2 +- techlibs/intel_alm/common/bram_m10k_map.v | 2 +- techlibs/intel_alm/common/lutram_mlab.txt | 38 +++++++------ techlibs/intel_alm/common/lutram_mlab_map.v | 29 ---------- techlibs/intel_alm/common/megafunction_bb.v | 23 ++++++++ techlibs/intel_alm/common/mem_sim.v | 60 +++++++++++++++++++++ techlibs/intel_alm/common/quartus_rename.v | 39 ++++++++++++++ techlibs/intel_alm/synth_intel_alm.cc | 2 +- tests/arch/intel_alm/lutram.ys | 20 +++++++ 9 files changed, 163 insertions(+), 52 deletions(-) delete mode 100644 techlibs/intel_alm/common/lutram_mlab_map.v create mode 100644 techlibs/intel_alm/common/mem_sim.v create mode 100644 tests/arch/intel_alm/lutram.ys diff --git a/techlibs/intel_alm/Makefile.inc b/techlibs/intel_alm/Makefile.inc index bbf233aeb..ed6c4510b 100644 --- a/techlibs/intel_alm/Makefile.inc +++ b/techlibs/intel_alm/Makefile.inc @@ -7,13 +7,13 @@ $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/al $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/arith_alm_map.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/dff_map.v)) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/dff_sim.v)) +$(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/mem_sim.v)) # RAM bramtypes := m10k m20k $(foreach bramtype, $(bramtypes), $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/bram_$(bramtype).txt))) $(foreach bramtype, $(bramtypes), $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/bram_$(bramtype)_map.v))) $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/lutram_mlab.txt)) -$(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/lutram_mlab_map.v)) # Miscellaneous $(eval $(call add_share_file,share/intel_alm/common,techlibs/intel_alm/common/megafunction_bb.v)) diff --git a/techlibs/intel_alm/common/bram_m10k_map.v b/techlibs/intel_alm/common/bram_m10k_map.v index e5566010d..061463c3e 100644 --- a/techlibs/intel_alm/common/bram_m10k_map.v +++ b/techlibs/intel_alm/common/bram_m10k_map.v @@ -28,4 +28,4 @@ altsyncram #( .clock1(CLK1) ); -endmodule +endmodule diff --git a/techlibs/intel_alm/common/lutram_mlab.txt b/techlibs/intel_alm/common/lutram_mlab.txt index 1d6174d85..3cc69399d 100644 --- a/techlibs/intel_alm/common/lutram_mlab.txt +++ b/techlibs/intel_alm/common/lutram_mlab.txt @@ -1,20 +1,18 @@ -bram __MISTRAL_MLAB - init 0 # TODO: Re-enable when I figure out how LUTRAM init works - abits 5 - dbits 16 @D32x16 - dbits 18 @D32x18 - dbits 20 @D32x20 - groups 2 - ports 1 1 - wrmode 1 0 - # read enable - enable 1 0 - transp 1 0 - clocks 1 2 - clkpol 1 1 -endbram - -match __MISTRAL_MLAB - min efficiency 5 - make_outreg -endmatch +bram MISTRAL_MLAB + init 0 # TODO: Re-enable when Yosys remembers the original filename. + abits 5 + dbits 1 + groups 2 + ports 1 1 + wrmode 1 0 + # write enable + enable 1 0 + transp 0 0 + clocks 1 0 + clkpol 1 1 +endbram + +match MISTRAL_MLAB + min efficiency 5 + make_outreg +endmatch \ No newline at end of file diff --git a/techlibs/intel_alm/common/lutram_mlab_map.v b/techlibs/intel_alm/common/lutram_mlab_map.v deleted file mode 100644 index 3a9c8590e..000000000 --- a/techlibs/intel_alm/common/lutram_mlab_map.v +++ /dev/null @@ -1,29 +0,0 @@ -module __MISTRAL_MLAB(CLK1, CLK2, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA); - -parameter CFG_ABITS = 5; -parameter CFG_DBITS = 20; - -input CLK1, CLK2; -input [CFG_ABITS-1:0] A1ADDR, B1ADDR; -input [CFG_DBITS-1:0] A1DATA; -input A1EN; -output [CFG_DBITS-1:0] B1DATA; - -altsyncram #( - .operation_mode("dual_port"), - .ram_block_type("mlab"), - .widthad_a(CFG_ABITS), - .width_a(CFG_DBITS), - .widthad_b(CFG_ABITS), - .width_b(CFG_DBITS), -) _TECHMAP_REPLACE_ ( - .address_a(A1ADDR), - .data_a(A1DATA), - .wren_a(A1EN), - .address_b(B1ADDR), - .q_b(B1DATA), - .clock0(CLK1), - .clock1(CLK1), -); - -endmodule diff --git a/techlibs/intel_alm/common/megafunction_bb.v b/techlibs/intel_alm/common/megafunction_bb.v index 21ba73a09..c749fa70b 100644 --- a/techlibs/intel_alm/common/megafunction_bb.v +++ b/techlibs/intel_alm/common/megafunction_bb.v @@ -106,3 +106,26 @@ input aclr1; output eccstatus; endmodule + +(* blackbox *) +module cyclonev_mlab_cell(portaaddr, portadatain, portbaddr, portbdataout, ena0, clk0, clk1); + +parameter logical_ram_name = ""; +parameter logical_ram_depth = 32; +parameter logical_ram_width = 20; +parameter mixed_port_feed_through_mode = "new"; +parameter first_bit_number = 0; +parameter first_address = 0; +parameter last_address = 31; +parameter address_width = 5; +parameter data_width = 1; +parameter byte_enable_mask_width = 1; +parameter port_b_data_out_clock = "NONE"; +parameter [639:0] mem_init0 = 640'b0; + +input [address_width-1:0] portaaddr, portbaddr; +input [data_width-1:0] portadatain; +output [data_width-1:0] portbdataout; +input ena0, clk0, clk1; + +endmodule diff --git a/techlibs/intel_alm/common/mem_sim.v b/techlibs/intel_alm/common/mem_sim.v new file mode 100644 index 000000000..ae79b19a4 --- /dev/null +++ b/techlibs/intel_alm/common/mem_sim.v @@ -0,0 +1,60 @@ +// The MLAB +// -------- +// In addition to Logic Array Blocks (LABs) that contain ten Adaptive Logic +// Modules (ALMs, see alm_sim.v), the Cyclone V/10GX also contain +// Memory/Logic Array Blocks (MLABs) that can act as either ten ALMs, or utilise +// the memory the ALM uses to store the look-up table data for general usage, +// producing a 32 address by 20-bit block of memory. MLABs are spread out +// around the chip, so they can be placed near where they are needed, rather than +// being comparatively limited in placement for a deep but narrow memory such as +// the M10K memory block. +// +// MLABs are used mainly for shallow but wide memories, such as CPU register +// files (which have perhaps 32 registers that are comparatively wide (16/32-bit)) +// or shift registers (by using the output of the Nth bit as input for the N+1th +// bit). +// +// Oddly, instead of providing a block 32 address by 20-bit cell, Quartus asks +// synthesis tools to build MLABs out of 32 address by 1-bit cells, and tries +// to put these cells in the same MLAB during cell placement. Because of this +// a MISTRAL_MLAB cell represents one of these 32 address by 1-bit cells, and +// 20 of them represent a physical MLAB. +// +// How the MLAB works +// ------------------ +// MLABs are poorly documented, so the following information is based mainly +// on the simulation model and my knowledge of how memories like these work. +// Additionally, note that the ports of MISTRAL_MLAB are the ones auto-generated +// by the Yosys `memory_bram` pass, and it doesn't make sense to me to use +// `techmap` just for the sake of renaming the cell ports. +// +// The MLAB can be initialised to any value, but unfortunately Quartus only +// allows memory initialisation from a file. Since Yosys doesn't preserve input +// file information, or write the contents of an `initial` block to a file, +// Yosys can't currently initialise the MLAB in a way Quartus will accept. +// +// The MLAB takes in data from A1DATA at the rising edge of CLK1, and if A1EN +// is high, writes it to the address in A1ADDR. A1EN can therefore be used to +// conditionally write data to the MLAB. +// +// Simultaneously, the MLAB reads data from B1ADDR, and outputs it to B1DATA, +// asynchronous to CLK1 and ignoring A1EN. If a synchronous read is needed +// then the output can be fed to embedded flops. Presently, Yosys assumes +// Quartus will pack external flops into the MLAB, but this is an assumption +// that needs testing. + +// The vendor sim model outputs 'x for a very short period (a few +// combinational delta cycles) after each write. This has been omitted from +// the following model because it's very difficult to trigger this in practice +// as clock cycles will be much longer than any potential blip of 'x, so the +// model can be treated as always returning a defined result. +module MISTRAL_MLAB(input [4:0] A1ADDR, input A1DATA, A1EN, CLK1, input [4:0] B1ADDR, output B1DATA); + +reg [31:0] mem = 32'b0; + +always @(posedge CLK1) + if (A1EN) mem[A1ADDR] <= A1DATA; + +assign B1DATA = mem[B1ADDR]; + +endmodule diff --git a/techlibs/intel_alm/common/quartus_rename.v b/techlibs/intel_alm/common/quartus_rename.v index ac0fe12aa..c40a4e02d 100644 --- a/techlibs/intel_alm/common/quartus_rename.v +++ b/techlibs/intel_alm/common/quartus_rename.v @@ -1,8 +1,10 @@ `ifdef cyclonev `define LCELL cyclonev_lcell_comb +`define MLAB cyclonev_mlab_cell `endif `ifdef cyclone10gx `define LCELL cyclone10gx_lcell_comb +`define MLAB cyclone10gx_mlab_cell `endif module __MISTRAL_VCC(output Q); @@ -80,3 +82,40 @@ parameter LUT1 = 16'h0000; `LCELL #(.lut_mask({16'h0, LUT1, 16'h0, LUT0})) _TECHMAP_REPLACE_ (.dataa(A), .datab(B), .datac(C), .datad(D0), .dataf(D1), .cin(CI), .sumout(SO), .cout(CO)); endmodule + + +module MISTRAL_MLAB(input [4:0] A1ADDR, input A1DATA, A1EN, CLK1, input [4:0] B1ADDR, output B1DATA); + +// Here we get to an unfortunate situation. The cell has a mem_init0 parameter, +// which takes in a hexadecimal string that could be used to initialise RAM. +// In the vendor simulation models, this appears to work fine, but Quartus, +// either intentionally or not, forgets about this parameter and initialises the +// RAM to zero. +// +// Because of this, RAM initialisation is presently disabled, but the source +// used to generate mem_init0 is kept (commented out) in case this gets fixed +// or an undocumented way to get Quartus to initialise from mem_init0 is found. + +`MLAB #( + .logical_ram_name("MISTRAL_MLAB"), + .logical_ram_depth(32), + .logical_ram_width(1), + .mixed_port_feed_through_mode("Dont Care"), + .first_bit_number(0), + .first_address(0), + .last_address(31), + .address_width(5), + .data_width(1), + .byte_enable_mask_width(1), + .port_b_data_out_clock("NONE"), + // .mem_init0($sformatf("%08x", INIT)) +) _TECHMAP_REPLACE_ ( + .portaaddr(A1ADDR), + .portadatain(A1DATA), + .portbaddr(B1ADDR), + .portbdataout(B1DATA), + .ena0(A1EN), + .clk0(CLK1) +); + +endmodule diff --git a/techlibs/intel_alm/synth_intel_alm.cc b/techlibs/intel_alm/synth_intel_alm.cc index 200b0cdd1..bf9e746b8 100644 --- a/techlibs/intel_alm/synth_intel_alm.cc +++ b/techlibs/intel_alm/synth_intel_alm.cc @@ -164,6 +164,7 @@ struct SynthIntelALMPass : public ScriptPass { run(stringf("read_verilog -sv -lib +/intel/%s/cells_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/alm_sim.v", family_opt.c_str())); run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/dff_sim.v", family_opt.c_str())); + run(stringf("read_verilog -specify -lib -D %s +/intel_alm/common/mem_sim.v", family_opt.c_str())); // Misc and common cells run("read_verilog -lib +/intel/common/altpll_bb.v"); @@ -190,7 +191,6 @@ struct SynthIntelALMPass : public ScriptPass { if (!nolutram && check_label("map_lutram", "(skip if -nolutram)")) { run("memory_bram -rules +/intel_alm/common/lutram_mlab.txt", "(for Cyclone V / Cyclone 10GX)"); - run("techmap -map +/intel_alm/common/lutram_mlab_map.v", "(for Cyclone V / Cyclone 10GX)"); } if (check_label("map_ffram")) { diff --git a/tests/arch/intel_alm/lutram.ys b/tests/arch/intel_alm/lutram.ys new file mode 100644 index 000000000..6f997b67b --- /dev/null +++ b/tests/arch/intel_alm/lutram.ys @@ -0,0 +1,20 @@ +read_verilog ../common/lutram.v +hierarchy -top lutram_1w1r +proc +memory -nomap +equiv_opt -run :prove -map +/intel_alm/common/alm_sim.v -map +/intel_alm/common/dff_sim.v -map +/intel_alm/common/mem_sim.v synth_intel_alm -family cyclonev -nobram +memory +opt -full + +miter -equiv -flatten -make_assert -make_outputs gold gate miter +sat -verify -prove-asserts -seq 5 -set-init-zero -show-inputs -show-outputs miter + +design -load postopt +cd lutram_1w1r +select -assert-count 16 t:MISTRAL_MLAB +select -assert-count 1 t:MISTRAL_NOT +select -assert-count 2 t:MISTRAL_ALUT2 +select -assert-count 8 t:MISTRAL_ALUT3 +select -assert-count 17 t:MISTRAL_FF +select -assert-none t:MISTRAL_NOT t:MISTRAL_ALUT2 t:MISTRAL_ALUT3 t:MISTRAL_FF t:MISTRAL_MLAB %% t:* %D + -- 2.30.2