From b487e99bcfc9a93761d4cabf819a68af32861b80 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Tue, 13 Dec 2011 17:33:12 +0100 Subject: [PATCH] Initial import --- .gitignore | 2 + build.py | 62 + build/.keep_me | 0 constraints.py | 45 + milkymist/__init__.py | 0 milkymist/lm32/__init__.py | 49 + milkymist/norflash/__init__.py | 31 + milkymist/uart/__init__.py | 28 + tb/norflash/Makefile | 20 + tb/norflash/norflash_conv.py | 10 + tb/norflash/tb_norflash.v | 129 ++ top.py | 22 + verilog/lm32/jtag_cores.v | 86 + verilog/lm32/jtag_tap_spartan6.v | 60 + verilog/lm32/lm32_adder.v | 136 ++ verilog/lm32/lm32_addsub.v | 95 + verilog/lm32/lm32_cpu.v | 2771 +++++++++++++++++++++++ verilog/lm32/lm32_dcache.v | 527 +++++ verilog/lm32/lm32_debug.v | 369 +++ verilog/lm32/lm32_decoder.v | 604 +++++ verilog/lm32/lm32_dp_ram.v | 35 + verilog/lm32/lm32_functions.v | 70 + verilog/lm32/lm32_icache.v | 481 ++++ verilog/lm32/lm32_include.v | 377 +++ verilog/lm32/lm32_instruction_unit.v | 889 ++++++++ verilog/lm32/lm32_interrupt.v | 356 +++ verilog/lm32/lm32_jtag.v | 498 ++++ verilog/lm32/lm32_load_store_unit.v | 829 +++++++ verilog/lm32/lm32_logic_op.v | 97 + verilog/lm32/lm32_mc_arithmetic.v | 309 +++ verilog/lm32/lm32_multiplier.v | 120 + verilog/lm32/lm32_multiplier_spartan6.v | 193 ++ verilog/lm32/lm32_ram.v | 128 ++ verilog/lm32/lm32_shifter.v | 155 ++ verilog/lm32/lm32_top.v | 354 +++ verilog/uart/uart.v | 142 ++ verilog/uart/uart_transceiver.v | 165 ++ 37 files changed, 10244 insertions(+) create mode 100644 .gitignore create mode 100644 build.py create mode 100644 build/.keep_me create mode 100644 constraints.py create mode 100644 milkymist/__init__.py create mode 100644 milkymist/lm32/__init__.py create mode 100644 milkymist/norflash/__init__.py create mode 100644 milkymist/uart/__init__.py create mode 100644 tb/norflash/Makefile create mode 100644 tb/norflash/norflash_conv.py create mode 100644 tb/norflash/tb_norflash.v create mode 100644 top.py create mode 100644 verilog/lm32/jtag_cores.v create mode 100644 verilog/lm32/jtag_tap_spartan6.v create mode 100644 verilog/lm32/lm32_adder.v create mode 100644 verilog/lm32/lm32_addsub.v create mode 100644 verilog/lm32/lm32_cpu.v create mode 100644 verilog/lm32/lm32_dcache.v create mode 100644 verilog/lm32/lm32_debug.v create mode 100644 verilog/lm32/lm32_decoder.v create mode 100644 verilog/lm32/lm32_dp_ram.v create mode 100644 verilog/lm32/lm32_functions.v create mode 100644 verilog/lm32/lm32_icache.v create mode 100644 verilog/lm32/lm32_include.v create mode 100644 verilog/lm32/lm32_instruction_unit.v create mode 100644 verilog/lm32/lm32_interrupt.v create mode 100644 verilog/lm32/lm32_jtag.v create mode 100644 verilog/lm32/lm32_load_store_unit.v create mode 100644 verilog/lm32/lm32_logic_op.v create mode 100644 verilog/lm32/lm32_mc_arithmetic.v create mode 100644 verilog/lm32/lm32_multiplier.v create mode 100644 verilog/lm32/lm32_multiplier_spartan6.v create mode 100644 verilog/lm32/lm32_ram.v create mode 100644 verilog/lm32/lm32_shifter.v create mode 100644 verilog/lm32/lm32_top.v create mode 100644 verilog/uart/uart.v create mode 100644 verilog/uart/uart_transceiver.v diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..597bc71d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +build/* diff --git a/build.py b/build.py new file mode 100644 index 00000000..0eb19cd8 --- /dev/null +++ b/build.py @@ -0,0 +1,62 @@ +import os +import top + +# list Verilog sources before changing directory +verilog_sources = [] +def add_core_dir(d): + for root, subFolders, files in os.walk(os.path.join("verilog", d)): + for f in files: + verilog_sources.append(os.path.join(root, f)) +def add_core_files(d, files): + for f in files: + verilog_sources.append(os.path.join("verilog", d, f)) +add_core_files("lm32", ["lm32_cpu.v", "lm32_instruction_unit.v", "lm32_decoder.v", + "lm32_load_store_unit.v", "lm32_adder.v", "lm32_addsub.v", "lm32_logic_op.v", + "lm32_shifter.v", "lm32_multiplier_spartan6.v", "lm32_mc_arithmetic.v", + "lm32_interrupt.v", "lm32_ram.v", "lm32_dp_ram.v", "lm32_icache.v", + "lm32_dcache.v", "lm32_top.v", "lm32_debug.v", "lm32_jtag.v", "jtag_cores.v", + "jtag_tap_spartan6.v"]) +add_core_dir("uart") + +os.system("rm -rf build/*") +os.chdir("build") + +def str2file(filename, contents): + f = open(filename, 'w') + f.write(contents) + f.close() + +# generate source +(src_verilog, src_ucf) = top.Get() +str2file("soc.v", src_verilog) +str2file("soc.ucf", src_ucf) +verilog_sources.append("build/soc.v") + +# xst +xst_prj = "" +for s in verilog_sources: + xst_prj += "verilog work ../" + s + "\n" +str2file("soc.prj", xst_prj) +str2file("soc.xst", """run +-ifn soc.prj +-top soc +-ifmt MIXED +-opt_mode SPEED +-opt_level 2 +-resource_sharing no +-reduce_control_sets auto +-ofn soc.ngc +-p xc6slx45-fgg484-2""") +os.system("xst -ifn soc.xst") + +# ngdbuild +os.system("ngdbuild -uc soc.ucf soc.ngc") + +# map +os.system("map -ol high -w soc.ngd") + +# par +os.system("par -ol high -w soc.ncd soc-routed.ncd") + +# bitgen +os.system("bitgen -g LCK_cycle:6 -g Binary:Yes -g INIT_9K:Yes -w soc-routed.ncd soc.bit") diff --git a/build/.keep_me b/build/.keep_me new file mode 100644 index 00000000..e69de29b diff --git a/constraints.py b/constraints.py new file mode 100644 index 00000000..7d949d3e --- /dev/null +++ b/constraints.py @@ -0,0 +1,45 @@ +def Get(ns, norflash0, uart0): + constraints = [] + def add(signal, pin, vec=-1, iostandard="LVCMOS33", extra=""): + constraints.append((ns.GetName(signal), vec, pin, iostandard, extra)) + def add_vec(signal, pins, iostandard="LVCMOS33", extra=""): + i = 0 + for p in pins: + add(signal, p, i, iostandard, extra) + i += 1 + + add_vec(norflash0.adr, ["L22", "L20", "K22", "K21", "J19", "H20", "F22", + "F21", "K17", "J17", "E22", "E20", "H18", "H19", "F20", + "G19", "C22", "C20", "D22", "D21", "F19", "F18", "D20", "D19"], + extra="SLEW = FAST | DRIVE = 8") + add_vec(norflash0.d, ["AA20", "U14", "U13", "AA6", "AB6", "W4", "Y4", "Y7", + "AA2", "AB2", "V15", "AA18", "AB18", "Y13", "AA12", "AB12"], + extra = "SLEW = FAST | DRIVE = 8 | PULLDOWN") + add(norflash0.oe_n, "M22", extra="SLEW = FAST | DRIVE = 8") + add(norflash0.we_n, "N20", extra="SLEW = FAST | DRIVE = 8") + add(norflash0.ce_n, "M21", extra="SLEW = FAST | DRIVE = 8") + add(norflash0.rst_n, "P22", extra="SLEW = FAST | DRIVE = 8") + + add(uart0.tx, "L17", extra="SLEW = SLOW") + add(uart0.rx, "K18", extra="PULLUP") + + r = "" + for c in constraints: + r += "NET \"" + c[0] + if c[1] >= 0: + r += "(" + str(c[1]) + ")" + r += "\" LOC = " + c[2] + r += " | IOSTANDARD = " + c[3] + if c[4]: + r += " | " + c[4] + r += ";\n" + + r += """ +NET "sys_clk" LOC = AB11 | IOSTANDARD = LVCMOS33; +NET "sys_clk" TNM_NET = "GRPclk50"; +TIMESPEC "TSclk50" = PERIOD "GRPclk50" 20 ns HIGH 50%; + +NET "sys_rst" LOC = AA4 | IOSTANDARD = LVCMOS33; + """ + + return r diff --git a/milkymist/__init__.py b/milkymist/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/milkymist/lm32/__init__.py b/milkymist/lm32/__init__.py new file mode 100644 index 00000000..3a26c423 --- /dev/null +++ b/milkymist/lm32/__init__.py @@ -0,0 +1,49 @@ +from migen.fhdl import structure as f +from migen.bus import wishbone + +class Inst: + def __init__(self): + self.ibus = i = wishbone.Master("lm32i") + self.dbus = d = wishbone.Master("lm32d") + f.Declare(self, "interrupt", f.BV(32)) + f.Declare(self, "ext_break") + self._inst = f.Instance("lm32_top", + [("I_ADR_O", i.adr_o), + ("I_DAT_O", i.dat_o), + ("I_SEL_O", i.sel_o), + ("I_CYC_O", i.cyc_o), + ("I_STB_O", i.stb_o), + ("I_WE_O", i.we_o), + ("I_CTI_O", i.cti_o), + ("I_LOCK_O", f.BV(1)), + ("I_BTE_O", i.bte_o), + ("D_ADR_O", d.adr_o), + ("D_DAT_O", d.dat_o), + ("D_SEL_O", d.sel_o), + ("D_CYC_O", d.cyc_o), + ("D_STB_O", d.stb_o), + ("D_WE_O", d.we_o), + ("D_CTI_O", d.cti_o), + ("D_LOCK_O", f.BV(1)), + ("D_BTE_O", d.bte_o)], + [("interrupt", self.interrupt), + #("ext_break", self.ext_break), + ("I_DAT_I", i.dat_i), + ("I_ACK_I", i.ack_i), + ("I_ERR_I", i.err_i), + ("I_RTY_I", f.BV(1)), + ("D_DAT_I", d.dat_i), + ("D_ACK_I", d.ack_i), + ("D_ERR_I", d.err_i), + ("D_RTY_I", f.BV(1))], + [], + "clk_i", + "rst_i", + "lm32") + + def GetFragment(self): + comb = [ + f.Assign(self._inst.ins["I_RTY_I"], 0), + f.Assign(self._inst.ins["D_RTY_I"], 0) + ] + return f.Fragment(comb=comb, instances=[self._inst]) \ No newline at end of file diff --git a/milkymist/norflash/__init__.py b/milkymist/norflash/__init__.py new file mode 100644 index 00000000..abec8d82 --- /dev/null +++ b/milkymist/norflash/__init__.py @@ -0,0 +1,31 @@ +from migen.fhdl import structure as f +from migen.bus import wishbone +from migen.corelogic import timeline +from functools import partial + +class Inst: + def __init__(self, adr_width, rd_timing): + self.bus = wishbone.Slave("norflash") + d = partial(f.Declare, self) + d("adr", f.BV(adr_width-1)) + d("d", f.BV(16)) + d("oe_n") + d("we_n") + d("ce_n") + d("rst_n") + self.timeline = timeline.Inst(self.bus.cyc_i & self.bus.stb_i, + [(0, [f.Assign(self.adr, f.Cat(0, self.bus.adr_i[2:adr_width]))]), + (rd_timing, [ + f.Assign(self.bus.dat_o[16:], self.d), + f.Assign(self.adr, f.Cat(1, self.bus.adr_i[2:adr_width]))]), + (2*rd_timing, [ + f.Assign(self.bus.dat_o[:16], self.d), + f.Assign(self.bus.ack_o, 1)]), + (2*rd_timing+1, [ + f.Assign(self.bus.ack_o, 0)])]) + + def GetFragment(self): + comb = [f.Assign(self.oe_n, 0), f.Assign(self.we_n, 1), + f.Assign(self.ce_n, 0), f.Assign(self.rst_n, 1)] + return f.Fragment(comb, pads={self.adr, self.d, self.oe_n, self.we_n, self.ce_n, self.rst_n}) \ + + self.timeline.GetFragment() diff --git a/milkymist/uart/__init__.py b/milkymist/uart/__init__.py new file mode 100644 index 00000000..ed85e951 --- /dev/null +++ b/milkymist/uart/__init__.py @@ -0,0 +1,28 @@ +from migen.fhdl import structure as f +from migen.bus import csr + +class Inst: + def __init__(self, csr_addr, clk_freq, baud=115200, break_en_default=f.Constant(0)): + self.bus = csr.Slave("uart") + f.Declare(self, "tx") + f.Declare(self, "rx") + f.Declare(self, "irq") + f.Declare(self, "brk") + self._inst = f.Instance("uart", + [("csr_do", self.bus.d_o), + ("uart_tx", self.tx), + ("irq", self.irq), + ("break", self.brk)], + [("csr_a", self.bus.a_i), + ("csr_we", self.bus.we_i), + ("csr_di", self.bus.d_i), + ("uart_rx", self.rx)], + [("csr_addr", f.Constant(csr_addr, f.BV(4))), + ("clk_freq", clk_freq), + ("baud", baud), + ("break_en_default", break_en_default)], + "sys_clk", + "sys_rst") + + def GetFragment(self): + return f.Fragment(instances=[self._inst], pads={self.tx, self.rx}) diff --git a/tb/norflash/Makefile b/tb/norflash/Makefile new file mode 100644 index 00000000..572800ed --- /dev/null +++ b/tb/norflash/Makefile @@ -0,0 +1,20 @@ +SOURCES=tb_norflash.v norflash.v + +all: tb_norflash + +isim: tb_norflash + ./tb_norflash + +cversim: $(SOURCES) + cver $(SOURCES) + +norflash.v: norflash_conv.py + python3 norflash_conv.py > norflash.v + +clean: + rm -f tb_norflash verilog.log norflash.vcd norflash.v + +tb_norflash: $(SOURCES) + iverilog -o tb_norflash $(SOURCES) + +.PHONY: clean sim cversim diff --git a/tb/norflash/norflash_conv.py b/tb/norflash/norflash_conv.py new file mode 100644 index 00000000..e093d572 --- /dev/null +++ b/tb/norflash/norflash_conv.py @@ -0,0 +1,10 @@ +from migen.fhdl import verilog +from migen.fhdl import structure as f +from migen.bus import wishbone +from milkymist import norflash + +norflash0 = norflash.Inst(25, 12) +frag = norflash0.GetFragment() +v = verilog.Convert(frag, name="norflash", + ios={norflash0.bus.cyc_i, norflash0.bus.stb_i, norflash0.bus.we_i, norflash0.bus.adr_i, norflash0.bus.sel_i, norflash0.bus.dat_i, norflash0.bus.dat_o, norflash0.bus.ack_o}) +print(v) diff --git a/tb/norflash/tb_norflash.v b/tb/norflash/tb_norflash.v new file mode 100644 index 00000000..02599fa1 --- /dev/null +++ b/tb/norflash/tb_norflash.v @@ -0,0 +1,129 @@ +/* + * Milkymist SoC + * Copyright (C) 2007, 2008, 2009, 2010, 2011 Sebastien Bourdeauducq + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +`timescale 1ns / 1ps + +module tb_norflash(); + +reg sys_clk; +reg sys_rst; + +reg [31:0] wb_adr_i; +wire [31:0] wb_dat_o; +reg wb_cyc_i; +reg wb_stb_i; +wire wb_ack_o; +reg [3:0] wb_sel_i; + +wire [23:0] flash_adr; +wire [15:0] flash_d; +reg [15:0] flash_do; + +always @(flash_adr) #110 flash_do <= flash_adr[15:0] + 16'b1; + +norflash dut( + .sys_clk(sys_clk), + .sys_rst(sys_rst), + + .wishbone_norflash_adr_i(wb_adr_i), + .wishbone_norflash_dat_o(wb_dat_o), + .wishbone_norflash_cyc_i(wb_cyc_i), + .wishbone_norflash_stb_i(wb_stb_i), + .wishbone_norflash_ack_o(wb_ack_o), + .wishbone_norflash_sel_i(wb_sel_i), + + .norflash_adr(flash_adr), + .norflash_d(flash_d), + .norflash_oe_n(flash_oe_n), + .norflash_we_n(flash_we_n) +); + +//assign flash_d = flash_oe_n ? 16'bz : flash_do; +assign flash_d = flash_do; + +task wbread; + input [31:0] address; + integer i; + begin + wb_adr_i <= address; + wb_cyc_i <= 1'b1; + wb_stb_i <= 1'b1; + + i = 1; + while(~wb_ack_o) begin + #5 sys_clk <= 1'b1; + #5 sys_clk <= 1'b0; + i = i + 1; + end + + $display("Read address %h completed in %d cycles, result %h", address, i, wb_dat_o); + + wb_cyc_i <= 1'b0; + wb_stb_i <= 1'b0; + + /* Let the core release its ack */ + #5 sys_clk <= 1'b1; + #5 sys_clk <= 1'b0; + end +endtask + +initial begin + $dumpfile("norflash.vcd"); + $dumpvars(1, dut); + + sys_rst <= 1'b1; + sys_clk <= 1'b0; + + wb_adr_i <= 32'h00000000; + wb_cyc_i <= 1'b0; + wb_stb_i <= 1'b0; + wb_sel_i <= 4'b1111; + + #5 sys_clk <= 1'b1; + #5 sys_clk <= 1'b0; + + sys_rst <= 1'b0; + #5 sys_clk <= 1'b1; + #5 sys_clk <= 1'b0; + + wbread(32'h00000000); + wbread(32'h00000004); + + wb_sel_i = 4'b0010; + wbread(32'h0000fff1); + + wb_sel_i = 4'b0100; + wbread(32'h0000fff2); + + wb_sel_i = 4'b1000; + wbread(32'h0000fff3); + + wb_sel_i = 4'b0100; + wbread(32'h0000fff0); + + wb_sel_i = 4'b1111; + wbread(32'h00000010); + #5 sys_clk = 1'b1; + #5 sys_clk = 1'b0; + #5 sys_clk = 1'b1; + #5 sys_clk = 1'b0; + wbread(32'h00000040); + + $finish; +end + +endmodule diff --git a/top.py b/top.py new file mode 100644 index 00000000..69439a4a --- /dev/null +++ b/top.py @@ -0,0 +1,22 @@ +from migen.fhdl import convtools, verilog, autofragment +from migen.bus import wishbone, csr, wishbone2csr +from milkymist import lm32, norflash, uart +import constraints + +def Get(): + cpu0 = lm32.Inst() + norflash0 = norflash.Inst(25, 12) + wishbone2csr0 = wishbone2csr.Inst() + wishbonecon0 = wishbone.InterconnectShared( + [cpu0.ibus, cpu0.dbus], + [(0, norflash0.bus), (3, wishbone2csr0.wishbone)], + register=True, + offset=1) + uart0 = uart.Inst(0, 50*1000*1000, baud=115200) + csrcon0 = csr.Interconnect(wishbone2csr0.csr, [uart0.bus]) + + frag = autofragment.FromLocal() + vns = convtools.Namespace() + src_verilog = verilog.Convert(frag, name="soc", ns=vns) + src_ucf = constraints.Get(vns, norflash0, uart0) + return (src_verilog, src_ucf) diff --git a/verilog/lm32/jtag_cores.v b/verilog/lm32/jtag_cores.v new file mode 100644 index 00000000..d1a76c80 --- /dev/null +++ b/verilog/lm32/jtag_cores.v @@ -0,0 +1,86 @@ +/* + * Milkymist SoC + * Copyright (c) 2010 Michael Walle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +module jtag_cores ( + input [7:0] reg_d, + input [2:0] reg_addr_d, + output reg_update, + output [7:0] reg_q, + output [2:0] reg_addr_q, + output jtck, + output jrstn +); + +wire tck; +wire tdi; +wire tdo; +wire shift; +wire update; +wire reset; + +jtag_tap jtag_tap ( + .tck(tck), + .tdi(tdi), + .tdo(tdo), + .shift(shift), + .update(update), + .reset(reset) +); + +reg [10:0] jtag_shift; +reg [10:0] jtag_latched; + +always @(posedge tck or posedge reset) +begin + if(reset) + jtag_shift <= 11'b0; + else begin + if(shift) + jtag_shift <= {tdi, jtag_shift[10:1]}; + else + jtag_shift <= {reg_d, reg_addr_d}; + end +end + +assign tdo = jtag_shift[0]; + +always @(posedge reg_update or posedge reset) +begin + if(reset) + jtag_latched <= 11'b0; + else + jtag_latched <= jtag_shift; +end + +assign reg_update = update; +assign reg_q = jtag_latched[10:3]; +assign reg_addr_q = jtag_latched[2:0]; +assign jtck = tck; +assign jrstn = ~reset; + +endmodule diff --git a/verilog/lm32/jtag_tap_spartan6.v b/verilog/lm32/jtag_tap_spartan6.v new file mode 100644 index 00000000..71b6879a --- /dev/null +++ b/verilog/lm32/jtag_tap_spartan6.v @@ -0,0 +1,60 @@ +/* + * Milkymist SoC + * Copyright (c) 2010 Michael Walle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +module jtag_tap( + output tck, + output tdi, + input tdo, + output shift, + output update, + output reset +); + +wire g_shift; +wire g_update; + +assign shift = g_shift & sel; +assign update = g_update & sel; + +BSCAN_SPARTAN6 #( + .JTAG_CHAIN(1) +) bscan ( + .CAPTURE(), + .DRCK(tck), + .RESET(reset), + .RUNTEST(), + .SEL(sel), + .SHIFT(g_shift), + .TCK(), + .TDI(tdi), + .TMS(), + .UPDATE(g_update), + .TDO(tdo) +); + +endmodule diff --git a/verilog/lm32/lm32_adder.v b/verilog/lm32/lm32_adder.v new file mode 100644 index 00000000..d4fa41df --- /dev/null +++ b/verilog/lm32/lm32_adder.v @@ -0,0 +1,136 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_adder.v +// Title : Integer adder / subtractor with comparison flag generation +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_adder ( + // ----- Inputs ------- + adder_op_x, + adder_op_x_n, + operand_0_x, + operand_1_x, + // ----- Outputs ------- + adder_result_x, + adder_carry_n_x, + adder_overflow_x + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input adder_op_x; // Operating to perform, 0 for addition, 1 for subtraction +input adder_op_x_n; // Inverted version of adder_op_x +input [`LM32_WORD_RNG] operand_0_x; // Operand to add, or subtract from +input [`LM32_WORD_RNG] operand_1_x; // Opearnd to add, or subtract by + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [`LM32_WORD_RNG] adder_result_x; // Result of addition or subtraction +wire [`LM32_WORD_RNG] adder_result_x; +output adder_carry_n_x; // Inverted carry +wire adder_carry_n_x; +output adder_overflow_x; // Indicates if overflow occured, only valid for subtractions +reg adder_overflow_x; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +wire a_sign; // Sign (i.e. positive or negative) of operand 0 +wire b_sign; // Sign of operand 1 +wire result_sign; // Sign of result + +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + +lm32_addsub addsub ( + // ----- Inputs ----- + .DataA (operand_0_x), + .DataB (operand_1_x), + .Cin (adder_op_x), + .Add_Sub (adder_op_x_n), + // ----- Ouputs ----- + .Result (adder_result_x), + .Cout (adder_carry_n_x) + ); + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +// Extract signs of operands and result + +assign a_sign = operand_0_x[`LM32_WORD_WIDTH-1]; +assign b_sign = operand_1_x[`LM32_WORD_WIDTH-1]; +assign result_sign = adder_result_x[`LM32_WORD_WIDTH-1]; + +// Determine whether an overflow occured when performing a subtraction + +always @(*) +begin + // +ve - -ve = -ve -> overflow + // -ve - +ve = +ve -> overflow + if ( (!a_sign & b_sign & result_sign) + || (a_sign & !b_sign & !result_sign) + ) + adder_overflow_x = `TRUE; + else + adder_overflow_x = `FALSE; +end + +endmodule + diff --git a/verilog/lm32/lm32_addsub.v b/verilog/lm32/lm32_addsub.v new file mode 100644 index 00000000..2a37ad2d --- /dev/null +++ b/verilog/lm32/lm32_addsub.v @@ -0,0 +1,95 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_addsub.v +// Title : PMI adder/subtractor. +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_addsub ( + // ----- Inputs ------- + DataA, + DataB, + Cin, + Add_Sub, + // ----- Outputs ------- + Result, + Cout + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input [31:0] DataA; +input [31:0] DataB; +input Cin; +input Add_Sub; + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [31:0] Result; +wire [31:0] Result; +output Cout; +wire Cout; + +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + +// Modified for Milkymist: removed non-portable instantiated block + wire [32:0] tmp_addResult = DataA + DataB + Cin; + wire [32:0] tmp_subResult = DataA - DataB - !Cin; + + assign Result = (Add_Sub == 1) ? tmp_addResult[31:0] : tmp_subResult[31:0]; + assign Cout = (Add_Sub == 1) ? tmp_addResult[32] : !tmp_subResult[32]; + +endmodule diff --git a/verilog/lm32/lm32_cpu.v b/verilog/lm32/lm32_cpu.v new file mode 100644 index 00000000..dc5be84c --- /dev/null +++ b/verilog/lm32/lm32_cpu.v @@ -0,0 +1,2771 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_cpu.v +// Title : Top-level of CPU. +// Dependencies : lm32_include.v +// +// Version 3.8 +// 1. Feature: Support for dynamically switching EBA to DEBA via a GPIO. +// 2. Bug: EA now reports instruction that caused the data abort, rather than +// next instruction. +// +// Version 3.4 +// 1. Bug Fix: In a tight infinite loop (add, sw, bi) incoming interrupts were +// never serviced. +// +// Version 3.3 +// 1. Feature: Support for memory that is tightly coupled to processor core, and +// has a single-cycle access latency (same as caches). Instruction port has +// access to a dedicated physically-mapped memory. Data port has access to +// a dedicated physically-mapped memory. In order to be able to manipulate +// values in both these memories via the debugger, these memories also +// interface with the data port of LM32. +// 2. Feature: Extended Configuration Register +// 3. Bug Fix: Removed port names that conflict with keywords reserved in System- +// Verilog. +// +// Version 3.2 +// 1. Bug Fix: Single-stepping a load/store to invalid address causes debugger to +// hang. At the same time CPU fails to register data bus error exception. Bug +// is caused because (a) data bus error exception occurs after load/store has +// passed X stage and next sequential instruction (e.g., brk) is already in X +// stage, and (b) data bus error exception had lower priority than, say, brk +// exception. +// 2. Bug Fix: If a brk (or scall/eret/bret) sequentially follows a load/store to +// invalid location, CPU will fail to register data bus error exception. The +// solution is to stall scall/eret/bret/brk instructions in D pipeline stage +// until load/store has completed. +// 3. Feature: Enable precise identification of load/store that causes seg fault. +// 4. SYNC resets used for register file when implemented in EBRs. +// +// Version 3.1 +// 1. Feature: LM32 Register File can now be mapped in to on-chip block RAM (EBR) +// instead of distributed memory by enabling the option in LM32 GUI. +// 2. Feature: LM32 also adds a static branch predictor to improve branch +// performance. All immediate-based forward-pointing branches are predicted +// not-taken. All immediate-based backward-pointing branches are predicted taken. +// +// Version 7.0SP2, 3.0 +// No Change +// +// Version 6.1.17 +// Initial Release +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_cpu ( + // ----- Inputs ------- + clk_i, +`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE + clk_n_i, +`endif + rst_i, +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + at_debug, + `endif +`endif + // From external devices +`ifdef CFG_INTERRUPTS_ENABLED + interrupt, +`endif + // From user logic +`ifdef CFG_USER_ENABLED + user_result, + user_complete, +`endif +`ifdef CFG_JTAG_ENABLED + // From JTAG + jtag_clk, + jtag_update, + jtag_reg_q, + jtag_reg_addr_q, +`endif +`ifdef CFG_EXTERNAL_BREAK_ENABLED + ext_break, +`endif +`ifdef CFG_IWB_ENABLED + // Instruction Wishbone master + I_DAT_I, + I_ACK_I, + I_ERR_I, + I_RTY_I, +`endif + // Data Wishbone master + D_DAT_I, + D_ACK_I, + D_ERR_I, + D_RTY_I, + // ----- Outputs ------- +`ifdef CFG_TRACE_ENABLED + trace_pc, + trace_pc_valid, + trace_exception, + trace_eid, + trace_eret, +`ifdef CFG_DEBUG_ENABLED + trace_bret, +`endif +`endif +`ifdef CFG_JTAG_ENABLED + jtag_reg_d, + jtag_reg_addr_d, +`endif +`ifdef CFG_USER_ENABLED + user_valid, + user_opcode, + user_operand_0, + user_operand_1, +`endif +`ifdef CFG_IWB_ENABLED + // Instruction Wishbone master + I_DAT_O, + I_ADR_O, + I_CYC_O, + I_SEL_O, + I_STB_O, + I_WE_O, + I_CTI_O, + I_LOCK_O, + I_BTE_O, +`endif + // Data Wishbone master + D_DAT_O, + D_ADR_O, + D_CYC_O, + D_SEL_O, + D_STB_O, + D_WE_O, + D_CTI_O, + D_LOCK_O, + D_BTE_O + ); + +///////////////////////////////////////////////////// +// Parameters +///////////////////////////////////////////////////// + +parameter eba_reset = `CFG_EBA_RESET; // Reset value for EBA CSR +`ifdef CFG_DEBUG_ENABLED +parameter deba_reset = `CFG_DEBA_RESET; // Reset value for DEBA CSR +`endif + +`ifdef CFG_ICACHE_ENABLED +parameter icache_associativity = `CFG_ICACHE_ASSOCIATIVITY; // Associativity of the cache (Number of ways) +parameter icache_sets = `CFG_ICACHE_SETS; // Number of sets +parameter icache_bytes_per_line = `CFG_ICACHE_BYTES_PER_LINE; // Number of bytes per cache line +parameter icache_base_address = `CFG_ICACHE_BASE_ADDRESS; // Base address of cachable memory +parameter icache_limit = `CFG_ICACHE_LIMIT; // Limit (highest address) of cachable memory +`else +parameter icache_associativity = 1; +parameter icache_sets = 512; +parameter icache_bytes_per_line = 16; +parameter icache_base_address = 0; +parameter icache_limit = 0; +`endif + +`ifdef CFG_DCACHE_ENABLED +parameter dcache_associativity = `CFG_DCACHE_ASSOCIATIVITY; // Associativity of the cache (Number of ways) +parameter dcache_sets = `CFG_DCACHE_SETS; // Number of sets +parameter dcache_bytes_per_line = `CFG_DCACHE_BYTES_PER_LINE; // Number of bytes per cache line +parameter dcache_base_address = `CFG_DCACHE_BASE_ADDRESS; // Base address of cachable memory +parameter dcache_limit = `CFG_DCACHE_LIMIT; // Limit (highest address) of cachable memory +`else +parameter dcache_associativity = 1; +parameter dcache_sets = 512; +parameter dcache_bytes_per_line = 16; +parameter dcache_base_address = 0; +parameter dcache_limit = 0; +`endif + +`ifdef CFG_DEBUG_ENABLED +parameter watchpoints = `CFG_WATCHPOINTS; // Number of h/w watchpoint CSRs +`else +parameter watchpoints = 0; +`endif +`ifdef CFG_ROM_DEBUG_ENABLED +parameter breakpoints = `CFG_BREAKPOINTS; // Number of h/w breakpoint CSRs +`else +parameter breakpoints = 0; +`endif + +`ifdef CFG_INTERRUPTS_ENABLED +parameter interrupts = `CFG_INTERRUPTS; // Number of interrupts +`else +parameter interrupts = 0; +`endif + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE +input clk_n_i; // Inverted clock +`endif +input rst_i; // Reset + +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + input at_debug; // GPIO input that maps EBA to DEBA + `endif +`endif + +`ifdef CFG_INTERRUPTS_ENABLED +input [`LM32_INTERRUPT_RNG] interrupt; // Interrupt pins +`endif + +`ifdef CFG_USER_ENABLED +input [`LM32_WORD_RNG] user_result; // User-defined instruction result +input user_complete; // User-defined instruction execution is complete +`endif + +`ifdef CFG_JTAG_ENABLED +input jtag_clk; // JTAG clock +input jtag_update; // JTAG state machine is in data register update state +input [`LM32_BYTE_RNG] jtag_reg_q; +input [2:0] jtag_reg_addr_q; +`endif + +`ifdef CFG_IWB_ENABLED +input [`LM32_WORD_RNG] I_DAT_I; // Instruction Wishbone interface read data +input I_ACK_I; // Instruction Wishbone interface acknowledgement +input I_ERR_I; // Instruction Wishbone interface error +input I_RTY_I; // Instruction Wishbone interface retry +`endif + +input [`LM32_WORD_RNG] D_DAT_I; // Data Wishbone interface read data +input D_ACK_I; // Data Wishbone interface acknowledgement +input D_ERR_I; // Data Wishbone interface error +input D_RTY_I; // Data Wishbone interface retry + +`ifdef CFG_EXTERNAL_BREAK_ENABLED +input ext_break; +`endif + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +`ifdef CFG_TRACE_ENABLED +output [`LM32_PC_RNG] trace_pc; // PC to trace +reg [`LM32_PC_RNG] trace_pc; +output trace_pc_valid; // Indicates that a new trace PC is valid +reg trace_pc_valid; +output trace_exception; // Indicates an exception has occured +reg trace_exception; +output [`LM32_EID_RNG] trace_eid; // Indicates what type of exception has occured +reg [`LM32_EID_RNG] trace_eid; +output trace_eret; // Indicates an eret instruction has been executed +reg trace_eret; +`ifdef CFG_DEBUG_ENABLED +output trace_bret; // Indicates a bret instruction has been executed +reg trace_bret; +`endif +`endif + +`ifdef CFG_JTAG_ENABLED +output [`LM32_BYTE_RNG] jtag_reg_d; +wire [`LM32_BYTE_RNG] jtag_reg_d; +output [2:0] jtag_reg_addr_d; +wire [2:0] jtag_reg_addr_d; +`endif + +`ifdef CFG_USER_ENABLED +output user_valid; // Indicates if user_opcode is valid +wire user_valid; +output [`LM32_USER_OPCODE_RNG] user_opcode; // User-defined instruction opcode +reg [`LM32_USER_OPCODE_RNG] user_opcode; +output [`LM32_WORD_RNG] user_operand_0; // First operand for user-defined instruction +wire [`LM32_WORD_RNG] user_operand_0; +output [`LM32_WORD_RNG] user_operand_1; // Second operand for user-defined instruction +wire [`LM32_WORD_RNG] user_operand_1; +`endif + +`ifdef CFG_IWB_ENABLED +output [`LM32_WORD_RNG] I_DAT_O; // Instruction Wishbone interface write data +wire [`LM32_WORD_RNG] I_DAT_O; +output [`LM32_WORD_RNG] I_ADR_O; // Instruction Wishbone interface address +wire [`LM32_WORD_RNG] I_ADR_O; +output I_CYC_O; // Instruction Wishbone interface cycle +wire I_CYC_O; +output [`LM32_BYTE_SELECT_RNG] I_SEL_O; // Instruction Wishbone interface byte select +wire [`LM32_BYTE_SELECT_RNG] I_SEL_O; +output I_STB_O; // Instruction Wishbone interface strobe +wire I_STB_O; +output I_WE_O; // Instruction Wishbone interface write enable +wire I_WE_O; +output [`LM32_CTYPE_RNG] I_CTI_O; // Instruction Wishbone interface cycle type +wire [`LM32_CTYPE_RNG] I_CTI_O; +output I_LOCK_O; // Instruction Wishbone interface lock bus +wire I_LOCK_O; +output [`LM32_BTYPE_RNG] I_BTE_O; // Instruction Wishbone interface burst type +wire [`LM32_BTYPE_RNG] I_BTE_O; +`endif + +output [`LM32_WORD_RNG] D_DAT_O; // Data Wishbone interface write data +wire [`LM32_WORD_RNG] D_DAT_O; +output [`LM32_WORD_RNG] D_ADR_O; // Data Wishbone interface address +wire [`LM32_WORD_RNG] D_ADR_O; +output D_CYC_O; // Data Wishbone interface cycle +wire D_CYC_O; +output [`LM32_BYTE_SELECT_RNG] D_SEL_O; // Data Wishbone interface byte select +wire [`LM32_BYTE_SELECT_RNG] D_SEL_O; +output D_STB_O; // Data Wishbone interface strobe +wire D_STB_O; +output D_WE_O; // Data Wishbone interface write enable +wire D_WE_O; +output [`LM32_CTYPE_RNG] D_CTI_O; // Data Wishbone interface cycle type +wire [`LM32_CTYPE_RNG] D_CTI_O; +output D_LOCK_O; // Date Wishbone interface lock bus +wire D_LOCK_O; +output [`LM32_BTYPE_RNG] D_BTE_O; // Data Wishbone interface burst type +wire [`LM32_BTYPE_RNG] D_BTE_O; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +// Pipeline registers + +`ifdef LM32_CACHE_ENABLED +reg valid_a; // Instruction in A stage is valid +`endif +reg valid_f; // Instruction in F stage is valid +reg valid_d; // Instruction in D stage is valid +reg valid_x; // Instruction in X stage is valid +reg valid_m; // Instruction in M stage is valid +reg valid_w; // Instruction in W stage is valid + +wire q_x; +wire [`LM32_WORD_RNG] immediate_d; // Immediate operand +wire load_d; // Indicates a load instruction +reg load_x; +reg load_m; +wire load_q_x; +wire store_q_x; +wire store_d; // Indicates a store instruction +reg store_x; +reg store_m; +wire [`LM32_SIZE_RNG] size_d; // Size of load/store (byte, hword, word) +reg [`LM32_SIZE_RNG] size_x; +wire branch_d; // Indicates a branch instruction +wire branch_predict_d; // Indicates a branch is predicted +wire branch_predict_taken_d; // Indicates a branch is predicted taken +wire [`LM32_PC_RNG] branch_predict_address_d; // Address to which predicted branch jumps +wire [`LM32_PC_RNG] branch_target_d; +wire bi_unconditional; +wire bi_conditional; +reg branch_x; +reg branch_predict_x; +reg branch_predict_taken_x; +reg branch_m; +reg branch_predict_m; +reg branch_predict_taken_m; +wire branch_mispredict_taken_m; // Indicates a branch was mispredicted as taken +wire branch_flushX_m; // Indicates that instruction in X stage must be squashed +wire branch_reg_d; // Branch to register or immediate +wire [`LM32_PC_RNG] branch_offset_d; // Branch offset for immediate branches +reg [`LM32_PC_RNG] branch_target_x; // Address to branch to +reg [`LM32_PC_RNG] branch_target_m; +wire [`LM32_D_RESULT_SEL_0_RNG] d_result_sel_0_d; // Which result should be selected in D stage for operand 0 +wire [`LM32_D_RESULT_SEL_1_RNG] d_result_sel_1_d; // Which result should be selected in D stage for operand 1 + +wire x_result_sel_csr_d; // Select X stage result from CSRs +reg x_result_sel_csr_x; +`ifdef LM32_MC_ARITHMETIC_ENABLED +wire x_result_sel_mc_arith_d; // Select X stage result from multi-cycle arithmetic unit +reg x_result_sel_mc_arith_x; +`endif +`ifdef LM32_NO_BARREL_SHIFT +wire x_result_sel_shift_d; // Select X stage result from shifter +reg x_result_sel_shift_x; +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED +wire x_result_sel_sext_d; // Select X stage result from sign-extend logic +reg x_result_sel_sext_x; +`endif +wire x_result_sel_logic_d; // Select X stage result from logic op unit +reg x_result_sel_logic_x; +`ifdef CFG_USER_ENABLED +wire x_result_sel_user_d; // Select X stage result from user-defined logic +reg x_result_sel_user_x; +`endif +wire x_result_sel_add_d; // Select X stage result from adder +reg x_result_sel_add_x; +wire m_result_sel_compare_d; // Select M stage result from comparison logic +reg m_result_sel_compare_x; +reg m_result_sel_compare_m; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED +wire m_result_sel_shift_d; // Select M stage result from shifter +reg m_result_sel_shift_x; +reg m_result_sel_shift_m; +`endif +wire w_result_sel_load_d; // Select W stage result from load/store unit +reg w_result_sel_load_x; +reg w_result_sel_load_m; +reg w_result_sel_load_w; +`ifdef CFG_PL_MULTIPLY_ENABLED +wire w_result_sel_mul_d; // Select W stage result from multiplier +reg w_result_sel_mul_x; +reg w_result_sel_mul_m; +reg w_result_sel_mul_w; +`endif +wire x_bypass_enable_d; // Whether result is bypassable in X stage +reg x_bypass_enable_x; +wire m_bypass_enable_d; // Whether result is bypassable in M stage +reg m_bypass_enable_x; +reg m_bypass_enable_m; +wire sign_extend_d; // Whether to sign-extend or zero-extend +reg sign_extend_x; +wire write_enable_d; // Register file write enable +reg write_enable_x; +wire write_enable_q_x; +reg write_enable_m; +wire write_enable_q_m; +reg write_enable_w; +wire write_enable_q_w; +wire read_enable_0_d; // Register file read enable 0 +wire [`LM32_REG_IDX_RNG] read_idx_0_d; // Register file read index 0 +wire read_enable_1_d; // Register file read enable 1 +wire [`LM32_REG_IDX_RNG] read_idx_1_d; // Register file read index 1 +wire [`LM32_REG_IDX_RNG] write_idx_d; // Register file write index +reg [`LM32_REG_IDX_RNG] write_idx_x; +reg [`LM32_REG_IDX_RNG] write_idx_m; +reg [`LM32_REG_IDX_RNG] write_idx_w; +wire [`LM32_CSR_RNG] csr_d; // CSR read/write index +reg [`LM32_CSR_RNG] csr_x; +wire [`LM32_CONDITION_RNG] condition_d; // Branch condition +reg [`LM32_CONDITION_RNG] condition_x; +`ifdef CFG_DEBUG_ENABLED +wire break_d; // Indicates a break instruction +reg break_x; +`endif +wire scall_d; // Indicates a scall instruction +reg scall_x; +wire eret_d; // Indicates an eret instruction +reg eret_x; +wire eret_q_x; +reg eret_m; +`ifdef CFG_TRACE_ENABLED +reg eret_w; +`endif +`ifdef CFG_DEBUG_ENABLED +wire bret_d; // Indicates a bret instruction +reg bret_x; +wire bret_q_x; +reg bret_m; +`ifdef CFG_TRACE_ENABLED +reg bret_w; +`endif +`endif +wire csr_write_enable_d; // CSR write enable +reg csr_write_enable_x; +wire csr_write_enable_q_x; +`ifdef CFG_USER_ENABLED +wire [`LM32_USER_OPCODE_RNG] user_opcode_d; // User-defined instruction opcode +`endif + +`ifdef CFG_BUS_ERRORS_ENABLED +wire bus_error_d; // Indicates an bus error occured while fetching the instruction in this pipeline stage +reg bus_error_x; +reg data_bus_error_exception_m; +reg [`LM32_PC_RNG] memop_pc_w; +`endif + +reg [`LM32_WORD_RNG] d_result_0; // Result of instruction in D stage (operand 0) +reg [`LM32_WORD_RNG] d_result_1; // Result of instruction in D stage (operand 1) +reg [`LM32_WORD_RNG] x_result; // Result of instruction in X stage +reg [`LM32_WORD_RNG] m_result; // Result of instruction in M stage +reg [`LM32_WORD_RNG] w_result; // Result of instruction in W stage + +reg [`LM32_WORD_RNG] operand_0_x; // Operand 0 for X stage instruction +reg [`LM32_WORD_RNG] operand_1_x; // Operand 1 for X stage instruction +reg [`LM32_WORD_RNG] store_operand_x; // Data read from register to store +reg [`LM32_WORD_RNG] operand_m; // Operand for M stage instruction +reg [`LM32_WORD_RNG] operand_w; // Operand for W stage instruction + +// To/from register file +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE +reg [`LM32_WORD_RNG] reg_data_live_0; +reg [`LM32_WORD_RNG] reg_data_live_1; +reg use_buf; // Whether to use reg_data_live or reg_data_buf +reg [`LM32_WORD_RNG] reg_data_buf_0; +reg [`LM32_WORD_RNG] reg_data_buf_1; +`endif +`ifdef LM32_EBR_REGISTER_FILE +`else +reg [`LM32_WORD_RNG] registers[0:(1<<`LM32_REG_IDX_WIDTH)-1]; // Register file +`endif +wire [`LM32_WORD_RNG] reg_data_0; // Register file read port 0 data +wire [`LM32_WORD_RNG] reg_data_1; // Register file read port 1 data +reg [`LM32_WORD_RNG] bypass_data_0; // Register value 0 after bypassing +reg [`LM32_WORD_RNG] bypass_data_1; // Register value 1 after bypassing +wire reg_write_enable_q_w; + +reg interlock; // Indicates pipeline should be stalled because of a read-after-write hazzard + +wire stall_a; // Stall instruction in A pipeline stage +wire stall_f; // Stall instruction in F pipeline stage +wire stall_d; // Stall instruction in D pipeline stage +wire stall_x; // Stall instruction in X pipeline stage +wire stall_m; // Stall instruction in M pipeline stage + +// To/from adder +wire adder_op_d; // Whether to add or subtract +reg adder_op_x; +reg adder_op_x_n; // Inverted version of adder_op_x +wire [`LM32_WORD_RNG] adder_result_x; // Result from adder +wire adder_overflow_x; // Whether a signed overflow occured +wire adder_carry_n_x; // Whether a carry was generated + +// To/from logical operations unit +wire [`LM32_LOGIC_OP_RNG] logic_op_d; // Which operation to perform +reg [`LM32_LOGIC_OP_RNG] logic_op_x; +wire [`LM32_WORD_RNG] logic_result_x; // Result of logical operation + +`ifdef CFG_SIGN_EXTEND_ENABLED +// From sign-extension unit +wire [`LM32_WORD_RNG] sextb_result_x; // Result of byte sign-extension +wire [`LM32_WORD_RNG] sexth_result_x; // Result of half-word sign-extenstion +wire [`LM32_WORD_RNG] sext_result_x; // Result of sign-extension specified by instruction +`endif + +// To/from shifter +`ifdef CFG_PL_BARREL_SHIFT_ENABLED +`ifdef CFG_ROTATE_ENABLED +wire rotate_d; // Whether we should rotate or shift +reg rotate_x; +`endif +wire direction_d; // Which direction to shift in +reg direction_x; +wire [`LM32_WORD_RNG] shifter_result_m; // Result of shifter +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +wire shift_left_d; // Indicates whether to perform a left shift or not +wire shift_left_q_d; +wire shift_right_d; // Indicates whether to perform a right shift or not +wire shift_right_q_d; +`endif +`ifdef LM32_NO_BARREL_SHIFT +wire [`LM32_WORD_RNG] shifter_result_x; // Result of single-bit right shifter +`endif + +// To/from multiplier +`ifdef LM32_MULTIPLY_ENABLED +wire [`LM32_WORD_RNG] multiplier_result_w; // Result from multiplier +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED +wire multiply_d; // Indicates whether to perform a multiply or not +wire multiply_q_d; +`endif + +// To/from divider +`ifdef CFG_MC_DIVIDE_ENABLED +wire divide_d; // Indicates whether to perform a divider or not +wire divide_q_d; +wire modulus_d; +wire modulus_q_d; +wire divide_by_zero_x; // Indicates an attempt was made to divide by zero +`endif + +// To from multi-cycle arithmetic unit +`ifdef LM32_MC_ARITHMETIC_ENABLED +wire mc_stall_request_x; // Multi-cycle arithmetic unit stall request +wire [`LM32_WORD_RNG] mc_result_x; +`endif + +// From CSRs +`ifdef CFG_INTERRUPTS_ENABLED +wire [`LM32_WORD_RNG] interrupt_csr_read_data_x;// Data read from interrupt CSRs +`endif +wire [`LM32_WORD_RNG] cfg; // Configuration CSR +wire [`LM32_WORD_RNG] cfg2; // Extended Configuration CSR +`ifdef CFG_CYCLE_COUNTER_ENABLED +reg [`LM32_WORD_RNG] cc; // Cycle counter CSR +`endif +reg [`LM32_WORD_RNG] csr_read_data_x; // Data read from CSRs + +// To/from instruction unit +wire [`LM32_PC_RNG] pc_f; // PC of instruction in F stage +wire [`LM32_PC_RNG] pc_d; // PC of instruction in D stage +wire [`LM32_PC_RNG] pc_x; // PC of instruction in X stage +wire [`LM32_PC_RNG] pc_m; // PC of instruction in M stage +wire [`LM32_PC_RNG] pc_w; // PC of instruction in W stage +`ifdef CFG_TRACE_ENABLED +reg [`LM32_PC_RNG] pc_c; // PC of last commited instruction +`endif +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE +wire [`LM32_INSTRUCTION_RNG] instruction_f; // Instruction in F stage +`endif +//pragma attribute instruction_d preserve_signal true +//pragma attribute instruction_d preserve_driver true +wire [`LM32_INSTRUCTION_RNG] instruction_d; // Instruction in D stage +`ifdef CFG_ICACHE_ENABLED +wire iflush; // Flush instruction cache +wire icache_stall_request; // Stall pipeline because instruction cache is busy +wire icache_restart_request; // Restart instruction that caused an instruction cache miss +wire icache_refill_request; // Request to refill instruction cache +wire icache_refilling; // Indicates the instruction cache is being refilled +`endif +`ifdef CFG_IROM_ENABLED +wire [`LM32_WORD_RNG] irom_store_data_m; // Store data to instruction ROM +wire [`LM32_WORD_RNG] irom_address_xm; // Address to instruction ROM from load-store unit +wire [`LM32_WORD_RNG] irom_data_m; // Load data from instruction ROM +wire irom_we_xm; // Indicates data needs to be written to instruction ROM +wire irom_stall_request_x; // Indicates D stage needs to be stalled on a store to instruction ROM +`endif + +// To/from load/store unit +`ifdef CFG_DCACHE_ENABLED +wire dflush_x; // Flush data cache +reg dflush_m; +wire dcache_stall_request; // Stall pipeline because data cache is busy +wire dcache_restart_request; // Restart instruction that caused a data cache miss +wire dcache_refill_request; // Request to refill data cache +wire dcache_refilling; // Indicates the data cache is being refilled +`endif +wire [`LM32_WORD_RNG] load_data_w; // Result of a load instruction +wire stall_wb_load; // Stall pipeline because of a load via the data Wishbone interface + +// To/from JTAG interface +`ifdef CFG_JTAG_ENABLED +`ifdef CFG_JTAG_UART_ENABLED +wire [`LM32_WORD_RNG] jtx_csr_read_data; // Read data for JTX CSR +wire [`LM32_WORD_RNG] jrx_csr_read_data; // Read data for JRX CSR +`endif +`ifdef CFG_HW_DEBUG_ENABLED +wire jtag_csr_write_enable; // Debugger CSR write enable +wire [`LM32_WORD_RNG] jtag_csr_write_data; // Data to write to specified CSR +wire [`LM32_CSR_RNG] jtag_csr; // Which CSR to write +wire jtag_read_enable; +wire [`LM32_BYTE_RNG] jtag_read_data; +wire jtag_write_enable; +wire [`LM32_BYTE_RNG] jtag_write_data; +wire [`LM32_WORD_RNG] jtag_address; +wire jtag_access_complete; +`endif +`ifdef CFG_DEBUG_ENABLED +wire jtag_break; // Request from debugger to raise a breakpoint +`endif +`endif + +// Hazzard detection +wire raw_x_0; // RAW hazzard between instruction in X stage and read port 0 +wire raw_x_1; // RAW hazzard between instruction in X stage and read port 1 +wire raw_m_0; // RAW hazzard between instruction in M stage and read port 0 +wire raw_m_1; // RAW hazzard between instruction in M stage and read port 1 +wire raw_w_0; // RAW hazzard between instruction in W stage and read port 0 +wire raw_w_1; // RAW hazzard between instruction in W stage and read port 1 + +// Control flow +wire cmp_zero; // Result of comparison is zero +wire cmp_negative; // Result of comparison is negative +wire cmp_overflow; // Comparison produced an overflow +wire cmp_carry_n; // Comparison produced a carry, inverted +reg condition_met_x; // Condition of branch instruction is met +reg condition_met_m; +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH +wire branch_taken_x; // Branch is taken in X stage +`endif +wire branch_taken_m; // Branch is taken in M stage + +wire kill_f; // Kill instruction in F stage +wire kill_d; // Kill instruction in D stage +wire kill_x; // Kill instruction in X stage +wire kill_m; // Kill instruction in M stage +wire kill_w; // Kill instruction in W stage + +reg [`LM32_PC_WIDTH+2-1:8] eba; // Exception Base Address (EBA) CSR +`ifdef CFG_DEBUG_ENABLED +reg [`LM32_PC_WIDTH+2-1:8] deba; // Debug Exception Base Address (DEBA) CSR +`endif +reg [`LM32_EID_RNG] eid_x; // Exception ID in X stage +`ifdef CFG_TRACE_ENABLED +reg [`LM32_EID_RNG] eid_m; // Exception ID in M stage +reg [`LM32_EID_RNG] eid_w; // Exception ID in W stage +`endif + +`ifdef CFG_DEBUG_ENABLED +`ifdef LM32_SINGLE_STEP_ENABLED +wire dc_ss; // Is single-step enabled +`endif +wire dc_re; // Remap all exceptions +wire exception_x; // An exception occured in the X stage +reg exception_m; // An instruction that caused an exception is in the M stage +wire debug_exception_x; // Indicates if a debug exception has occured +reg debug_exception_m; +reg debug_exception_w; +wire debug_exception_q_w; +wire non_debug_exception_x; // Indicates if a non debug exception has occured +reg non_debug_exception_m; +reg non_debug_exception_w; +wire non_debug_exception_q_w; +`else +wire exception_x; // Indicates if a debug exception has occured +reg exception_m; +reg exception_w; +wire exception_q_w; +`endif + +`ifdef CFG_DEBUG_ENABLED +wire reset_exception; // Indicates if a reset exception has occured +`endif +`ifdef CFG_INTERRUPTS_ENABLED +wire interrupt_exception; // Indicates if an interrupt exception has occured +`endif +`ifdef CFG_DEBUG_ENABLED +wire breakpoint_exception; // Indicates if a breakpoint exception has occured +wire watchpoint_exception; // Indicates if a watchpoint exception has occured +`endif +`ifdef CFG_BUS_ERRORS_ENABLED +wire instruction_bus_error_exception; // Indicates if an instruction bus error exception has occured +wire data_bus_error_exception; // Indicates if a data bus error exception has occured +`endif +`ifdef CFG_MC_DIVIDE_ENABLED +wire divide_by_zero_exception; // Indicates if a divide by zero exception has occured +`endif +wire system_call_exception; // Indicates if a system call exception has occured + +`ifdef CFG_BUS_ERRORS_ENABLED +reg data_bus_error_seen; // Indicates if a data bus error was seen +`endif + +`ifdef CFG_EXTERNAL_BREAK_ENABLED +reg ext_break_r; +`endif + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" + +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + +// Instruction unit +lm32_instruction_unit #( + .associativity (icache_associativity), + .sets (icache_sets), + .bytes_per_line (icache_bytes_per_line), + .base_address (icache_base_address), + .limit (icache_limit) + ) instruction_unit ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + .at_debug (at_debug), + `endif +`endif + // From pipeline + .stall_a (stall_a), + .stall_f (stall_f), + .stall_d (stall_d), + .stall_x (stall_x), + .stall_m (stall_m), + .valid_f (valid_f), + .valid_d (valid_d), + .kill_f (kill_f), + .branch_predict_taken_d (branch_predict_taken_d), + .branch_predict_address_d (branch_predict_address_d), +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH + .branch_taken_x (branch_taken_x), + .branch_target_x (branch_target_x), +`endif + .exception_m (exception_m), + .branch_taken_m (branch_taken_m), + .branch_mispredict_taken_m (branch_mispredict_taken_m), + .branch_target_m (branch_target_m), +`ifdef CFG_ICACHE_ENABLED + .iflush (iflush), +`endif +`ifdef CFG_IROM_ENABLED + .irom_store_data_m (irom_store_data_m), + .irom_address_xm (irom_address_xm), + .irom_we_xm (irom_we_xm), +`endif +`ifdef CFG_DCACHE_ENABLED + .dcache_restart_request (dcache_restart_request), + .dcache_refill_request (dcache_refill_request), + .dcache_refilling (dcache_refilling), +`endif +`ifdef CFG_IWB_ENABLED + // From Wishbone + .i_dat_i (I_DAT_I), + .i_ack_i (I_ACK_I), + .i_err_i (I_ERR_I), +`endif +`ifdef CFG_HW_DEBUG_ENABLED + .jtag_read_enable (jtag_read_enable), + .jtag_write_enable (jtag_write_enable), + .jtag_write_data (jtag_write_data), + .jtag_address (jtag_address), +`endif + // ----- Outputs ------- + // To pipeline + .pc_f (pc_f), + .pc_d (pc_d), + .pc_x (pc_x), + .pc_m (pc_m), + .pc_w (pc_w), +`ifdef CFG_ICACHE_ENABLED + .icache_stall_request (icache_stall_request), + .icache_restart_request (icache_restart_request), + .icache_refill_request (icache_refill_request), + .icache_refilling (icache_refilling), +`endif +`ifdef CFG_IROM_ENABLED + .irom_data_m (irom_data_m), +`endif +`ifdef CFG_IWB_ENABLED + // To Wishbone + .i_dat_o (I_DAT_O), + .i_adr_o (I_ADR_O), + .i_cyc_o (I_CYC_O), + .i_sel_o (I_SEL_O), + .i_stb_o (I_STB_O), + .i_we_o (I_WE_O), + .i_cti_o (I_CTI_O), + .i_lock_o (I_LOCK_O), + .i_bte_o (I_BTE_O), +`endif +`ifdef CFG_HW_DEBUG_ENABLED + .jtag_read_data (jtag_read_data), + .jtag_access_complete (jtag_access_complete), +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + .bus_error_d (bus_error_d), +`endif +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE + .instruction_f (instruction_f), +`endif + .instruction_d (instruction_d) + ); + +// Instruction decoder +lm32_decoder decoder ( + // ----- Inputs ------- + .instruction (instruction_d), + // ----- Outputs ------- + .d_result_sel_0 (d_result_sel_0_d), + .d_result_sel_1 (d_result_sel_1_d), + .x_result_sel_csr (x_result_sel_csr_d), +`ifdef LM32_MC_ARITHMETIC_ENABLED + .x_result_sel_mc_arith (x_result_sel_mc_arith_d), +`endif +`ifdef LM32_NO_BARREL_SHIFT + .x_result_sel_shift (x_result_sel_shift_d), +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + .x_result_sel_sext (x_result_sel_sext_d), +`endif + .x_result_sel_logic (x_result_sel_logic_d), +`ifdef CFG_USER_ENABLED + .x_result_sel_user (x_result_sel_user_d), +`endif + .x_result_sel_add (x_result_sel_add_d), + .m_result_sel_compare (m_result_sel_compare_d), +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + .m_result_sel_shift (m_result_sel_shift_d), +`endif + .w_result_sel_load (w_result_sel_load_d), +`ifdef CFG_PL_MULTIPLY_ENABLED + .w_result_sel_mul (w_result_sel_mul_d), +`endif + .x_bypass_enable (x_bypass_enable_d), + .m_bypass_enable (m_bypass_enable_d), + .read_enable_0 (read_enable_0_d), + .read_idx_0 (read_idx_0_d), + .read_enable_1 (read_enable_1_d), + .read_idx_1 (read_idx_1_d), + .write_enable (write_enable_d), + .write_idx (write_idx_d), + .immediate (immediate_d), + .branch_offset (branch_offset_d), + .load (load_d), + .store (store_d), + .size (size_d), + .sign_extend (sign_extend_d), + .adder_op (adder_op_d), + .logic_op (logic_op_d), +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + .direction (direction_d), +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + .shift_left (shift_left_d), + .shift_right (shift_right_d), +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + .multiply (multiply_d), +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + .divide (divide_d), + .modulus (modulus_d), +`endif + .branch (branch_d), + .bi_unconditional (bi_unconditional), + .bi_conditional (bi_conditional), + .branch_reg (branch_reg_d), + .condition (condition_d), +`ifdef CFG_DEBUG_ENABLED + .break_opcode (break_d), +`endif + .scall (scall_d), + .eret (eret_d), +`ifdef CFG_DEBUG_ENABLED + .bret (bret_d), +`endif +`ifdef CFG_USER_ENABLED + .user_opcode (user_opcode_d), +`endif + .csr_write_enable (csr_write_enable_d) + ); + +// Load/store unit +lm32_load_store_unit #( + .associativity (dcache_associativity), + .sets (dcache_sets), + .bytes_per_line (dcache_bytes_per_line), + .base_address (dcache_base_address), + .limit (dcache_limit) + ) load_store_unit ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), + // From pipeline + .stall_a (stall_a), + .stall_x (stall_x), + .stall_m (stall_m), + .kill_x (kill_x), + .kill_m (kill_m), + .exception_m (exception_m), + .store_operand_x (store_operand_x), + .load_store_address_x (adder_result_x), + .load_store_address_m (operand_m), + .load_store_address_w (operand_w[1:0]), + .load_x (load_x), + .store_x (store_x), + .load_q_x (load_q_x), + .store_q_x (store_q_x), + .load_q_m (load_q_m), + .store_q_m (store_q_m), + .sign_extend_x (sign_extend_x), + .size_x (size_x), +`ifdef CFG_DCACHE_ENABLED + .dflush (dflush_m), +`endif +`ifdef CFG_IROM_ENABLED + .irom_data_m (irom_data_m), +`endif + // From Wishbone + .d_dat_i (D_DAT_I), + .d_ack_i (D_ACK_I), + .d_err_i (D_ERR_I), + .d_rty_i (D_RTY_I), + // ----- Outputs ------- + // To pipeline +`ifdef CFG_DCACHE_ENABLED + .dcache_refill_request (dcache_refill_request), + .dcache_restart_request (dcache_restart_request), + .dcache_stall_request (dcache_stall_request), + .dcache_refilling (dcache_refilling), +`endif +`ifdef CFG_IROM_ENABLED + .irom_store_data_m (irom_store_data_m), + .irom_address_xm (irom_address_xm), + .irom_we_xm (irom_we_xm), + .irom_stall_request_x (irom_stall_request_x), +`endif + .load_data_w (load_data_w), + .stall_wb_load (stall_wb_load), + // To Wishbone + .d_dat_o (D_DAT_O), + .d_adr_o (D_ADR_O), + .d_cyc_o (D_CYC_O), + .d_sel_o (D_SEL_O), + .d_stb_o (D_STB_O), + .d_we_o (D_WE_O), + .d_cti_o (D_CTI_O), + .d_lock_o (D_LOCK_O), + .d_bte_o (D_BTE_O) + ); + +// Adder +lm32_adder adder ( + // ----- Inputs ------- + .adder_op_x (adder_op_x), + .adder_op_x_n (adder_op_x_n), + .operand_0_x (operand_0_x), + .operand_1_x (operand_1_x), + // ----- Outputs ------- + .adder_result_x (adder_result_x), + .adder_carry_n_x (adder_carry_n_x), + .adder_overflow_x (adder_overflow_x) + ); + +// Logic operations +lm32_logic_op logic_op ( + // ----- Inputs ------- + .logic_op_x (logic_op_x), + .operand_0_x (operand_0_x), + + .operand_1_x (operand_1_x), + // ----- Outputs ------- + .logic_result_x (logic_result_x) + ); + +`ifdef CFG_PL_BARREL_SHIFT_ENABLED +// Pipelined barrel-shifter +lm32_shifter shifter ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), + .stall_x (stall_x), + .direction_x (direction_x), + .sign_extend_x (sign_extend_x), + .operand_0_x (operand_0_x), + .operand_1_x (operand_1_x), + // ----- Outputs ------- + .shifter_result_m (shifter_result_m) + ); +`endif + +`ifdef CFG_PL_MULTIPLY_ENABLED +// Pipeline fixed-point multiplier +lm32_multiplier multiplier ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), + .stall_x (stall_x), + .stall_m (stall_m), + .operand_0 (d_result_0), + .operand_1 (d_result_1), + // ----- Outputs ------- + .result (multiplier_result_w) + ); +`endif + +`ifdef LM32_MC_ARITHMETIC_ENABLED +// Multi-cycle arithmetic +lm32_mc_arithmetic mc_arithmetic ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), + .stall_d (stall_d), + .kill_x (kill_x), +`ifdef CFG_MC_DIVIDE_ENABLED + .divide_d (divide_q_d), + .modulus_d (modulus_q_d), +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + .multiply_d (multiply_q_d), +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + .shift_left_d (shift_left_q_d), + .shift_right_d (shift_right_q_d), + .sign_extend_d (sign_extend_d), +`endif + .operand_0_d (d_result_0), + .operand_1_d (d_result_1), + // ----- Outputs ------- + .result_x (mc_result_x), +`ifdef CFG_MC_DIVIDE_ENABLED + .divide_by_zero_x (divide_by_zero_x), +`endif + .stall_request_x (mc_stall_request_x) + ); +`endif + +`ifdef CFG_INTERRUPTS_ENABLED +// Interrupt unit +lm32_interrupt interrupt_unit ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), + // From external devices + .interrupt (interrupt), + // From pipeline + .stall_x (stall_x), +`ifdef CFG_DEBUG_ENABLED + .non_debug_exception (non_debug_exception_q_w), + .debug_exception (debug_exception_q_w), +`else + .exception (exception_q_w), +`endif + .eret_q_x (eret_q_x), +`ifdef CFG_DEBUG_ENABLED + .bret_q_x (bret_q_x), +`endif + .csr (csr_x), + .csr_write_data (operand_1_x), + .csr_write_enable (csr_write_enable_q_x), + // ----- Outputs ------- + .interrupt_exception (interrupt_exception), + // To pipeline + .csr_read_data (interrupt_csr_read_data_x) + ); +`endif + +`ifdef CFG_JTAG_ENABLED +// JTAG interface +lm32_jtag jtag ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), + // From JTAG + .jtag_clk (jtag_clk), + .jtag_update (jtag_update), + .jtag_reg_q (jtag_reg_q), + .jtag_reg_addr_q (jtag_reg_addr_q), + // From pipeline +`ifdef CFG_JTAG_UART_ENABLED + .csr (csr_x), + .csr_write_data (operand_1_x), + .csr_write_enable (csr_write_enable_q_x), + .stall_x (stall_x), +`endif +`ifdef CFG_HW_DEBUG_ENABLED + .jtag_read_data (jtag_read_data), + .jtag_access_complete (jtag_access_complete), +`endif +`ifdef CFG_DEBUG_ENABLED + .exception_q_w (debug_exception_q_w || non_debug_exception_q_w), +`endif + // ----- Outputs ------- + // To pipeline +`ifdef CFG_JTAG_UART_ENABLED + .jtx_csr_read_data (jtx_csr_read_data), + .jrx_csr_read_data (jrx_csr_read_data), +`endif +`ifdef CFG_HW_DEBUG_ENABLED + .jtag_csr_write_enable (jtag_csr_write_enable), + .jtag_csr_write_data (jtag_csr_write_data), + .jtag_csr (jtag_csr), + .jtag_read_enable (jtag_read_enable), + .jtag_write_enable (jtag_write_enable), + .jtag_write_data (jtag_write_data), + .jtag_address (jtag_address), +`endif +`ifdef CFG_DEBUG_ENABLED + .jtag_break (jtag_break), + .jtag_reset (reset_exception), +`endif + // To JTAG + .jtag_reg_d (jtag_reg_d), + .jtag_reg_addr_d (jtag_reg_addr_d) + ); +`endif + +`ifdef CFG_DEBUG_ENABLED +// Debug unit +lm32_debug #( + .breakpoints (breakpoints), + .watchpoints (watchpoints) + ) hw_debug ( + // ----- Inputs ------- + .clk_i (clk_i), + .rst_i (rst_i), + .pc_x (pc_x), + .load_x (load_x), + .store_x (store_x), + .load_store_address_x (adder_result_x), + .csr_write_enable_x (csr_write_enable_q_x), + .csr_write_data (operand_1_x), + .csr_x (csr_x), +`ifdef CFG_HW_DEBUG_ENABLED + .jtag_csr_write_enable (jtag_csr_write_enable), + .jtag_csr_write_data (jtag_csr_write_data), + .jtag_csr (jtag_csr), +`endif +`ifdef LM32_SINGLE_STEP_ENABLED + .eret_q_x (eret_q_x), + .bret_q_x (bret_q_x), + .stall_x (stall_x), + .exception_x (exception_x), + .q_x (q_x), +`ifdef CFG_DCACHE_ENABLED + .dcache_refill_request (dcache_refill_request), +`endif +`endif + // ----- Outputs ------- +`ifdef LM32_SINGLE_STEP_ENABLED + .dc_ss (dc_ss), +`endif + .dc_re (dc_re), + .bp_match (bp_match), + .wp_match (wp_match) + ); +`endif + +// Register file + +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE + /*---------------------------------------------------------------------- + Register File is implemented using EBRs. There can be three accesses to + the register file in each cycle: two reads and one write. On-chip block + RAM has two read/write ports. To accomodate three accesses, two on-chip + block RAMs are used (each register file "write" is made to both block + RAMs). + + One limitation of the on-chip block RAMs is that one cannot perform a + read and write to same location in a cycle (if this is done, then the + data read out is indeterminate). + ----------------------------------------------------------------------*/ + wire [31:0] regfile_data_0, regfile_data_1; + reg [31:0] w_result_d; + reg regfile_raw_0, regfile_raw_0_nxt; + reg regfile_raw_1, regfile_raw_1_nxt; + + /*---------------------------------------------------------------------- + Check if read and write is being performed to same register in current + cycle? This is done by comparing the read and write IDXs. + ----------------------------------------------------------------------*/ + always @(reg_write_enable_q_w or write_idx_w or instruction_f) + begin + if (reg_write_enable_q_w + && (write_idx_w == instruction_f[25:21])) + regfile_raw_0_nxt = 1'b1; + else + regfile_raw_0_nxt = 1'b0; + + if (reg_write_enable_q_w + && (write_idx_w == instruction_f[20:16])) + regfile_raw_1_nxt = 1'b1; + else + regfile_raw_1_nxt = 1'b0; + end + + /*---------------------------------------------------------------------- + Select latched (delayed) write value or data from register file. If + read in previous cycle was performed to register written to in same + cycle, then latched (delayed) write value is selected. + ----------------------------------------------------------------------*/ + always @(regfile_raw_0 or w_result_d or regfile_data_0) + if (regfile_raw_0) + reg_data_live_0 = w_result_d; + else + reg_data_live_0 = regfile_data_0; + + /*---------------------------------------------------------------------- + Select latched (delayed) write value or data from register file. If + read in previous cycle was performed to register written to in same + cycle, then latched (delayed) write value is selected. + ----------------------------------------------------------------------*/ + always @(regfile_raw_1 or w_result_d or regfile_data_1) + if (regfile_raw_1) + reg_data_live_1 = w_result_d; + else + reg_data_live_1 = regfile_data_1; + + /*---------------------------------------------------------------------- + Latch value written to register file + ----------------------------------------------------------------------*/ + always @(posedge clk_i `CFG_RESET_SENSITIVITY) + if (rst_i == `TRUE) + begin + regfile_raw_0 <= 1'b0; + regfile_raw_1 <= 1'b0; + w_result_d <= 32'b0; + end + else + begin + regfile_raw_0 <= regfile_raw_0_nxt; + regfile_raw_1 <= regfile_raw_1_nxt; + w_result_d <= w_result; + end + + /*---------------------------------------------------------------------- + Register file instantiation as Pseudo-Dual Port EBRs. + ----------------------------------------------------------------------*/ + // Modified by GSI: removed non-portable RAM instantiation + lm32_dp_ram + #( + // ----- Parameters ----- + .addr_depth(1<<5), + .addr_width(5), + .data_width(32) + ) + reg_0 + ( + // ----- Inputs ----- + .clk_i (clk_i), + .rst_i (rst_i), + .we_i (reg_write_enable_q_w), + .wdata_i (w_result), + .waddr_i (write_idx_w), + .raddr_i (instruction_f[25:21]), + // ----- Outputs ----- + .rdata_o (regfile_data_0) + ); + + lm32_dp_ram + #( + .addr_depth(1<<5), + .addr_width(5), + .data_width(32) + ) + reg_1 + ( + // ----- Inputs ----- + .clk_i (clk_i), + .rst_i (rst_i), + .we_i (reg_write_enable_q_w), + .wdata_i (w_result), + .waddr_i (write_idx_w), + .raddr_i (instruction_f[20:16]), + // ----- Outputs ----- + .rdata_o (regfile_data_1) + ); +`endif + +`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE + pmi_ram_dp + #( + // ----- Parameters ----- + .pmi_wr_addr_depth(1<<5), + .pmi_wr_addr_width(5), + .pmi_wr_data_width(32), + .pmi_rd_addr_depth(1<<5), + .pmi_rd_addr_width(5), + .pmi_rd_data_width(32), + .pmi_regmode("noreg"), + .pmi_gsr("enable"), + .pmi_resetmode("sync"), + .pmi_init_file("none"), + .pmi_init_file_format("binary"), + .pmi_family(`LATTICE_FAMILY), + .module_type("pmi_ram_dp") + ) + reg_0 + ( + // ----- Inputs ----- + .Data(w_result), + .WrAddress(write_idx_w), + .RdAddress(read_idx_0_d), + .WrClock(clk_i), + .RdClock(clk_n_i), + .WrClockEn(`TRUE), + .RdClockEn(stall_f == `FALSE), + .WE(reg_write_enable_q_w), + .Reset(rst_i), + // ----- Outputs ----- + .Q(reg_data_0) + ); + + pmi_ram_dp + #( + // ----- Parameters ----- + .pmi_wr_addr_depth(1<<5), + .pmi_wr_addr_width(5), + .pmi_wr_data_width(32), + .pmi_rd_addr_depth(1<<5), + .pmi_rd_addr_width(5), + .pmi_rd_data_width(32), + .pmi_regmode("noreg"), + .pmi_gsr("enable"), + .pmi_resetmode("sync"), + .pmi_init_file("none"), + .pmi_init_file_format("binary"), + .pmi_family(`LATTICE_FAMILY), + .module_type("pmi_ram_dp") + ) + reg_1 + ( + // ----- Inputs ----- + .Data(w_result), + .WrAddress(write_idx_w), + .RdAddress(read_idx_1_d), + .WrClock(clk_i), + .RdClock(clk_n_i), + .WrClockEn(`TRUE), + .RdClockEn(stall_f == `FALSE), + .WE(reg_write_enable_q_w), + .Reset(rst_i), + // ----- Outputs ----- + .Q(reg_data_1) + ); +`endif + + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE +// Select between buffered and live data from register file +assign reg_data_0 = use_buf ? reg_data_buf_0 : reg_data_live_0; +assign reg_data_1 = use_buf ? reg_data_buf_1 : reg_data_live_1; +`endif +`ifdef LM32_EBR_REGISTER_FILE +`else +// Register file read ports +assign reg_data_0 = registers[read_idx_0_d]; +assign reg_data_1 = registers[read_idx_1_d]; +`endif + +// Detect read-after-write hazzards +assign raw_x_0 = (write_idx_x == read_idx_0_d) && (write_enable_q_x == `TRUE); +assign raw_m_0 = (write_idx_m == read_idx_0_d) && (write_enable_q_m == `TRUE); +assign raw_w_0 = (write_idx_w == read_idx_0_d) && (write_enable_q_w == `TRUE); +assign raw_x_1 = (write_idx_x == read_idx_1_d) && (write_enable_q_x == `TRUE); +assign raw_m_1 = (write_idx_m == read_idx_1_d) && (write_enable_q_m == `TRUE); +assign raw_w_1 = (write_idx_w == read_idx_1_d) && (write_enable_q_w == `TRUE); + +// Interlock detection - Raise an interlock for RAW hazzards +always @(*) +begin + if ( ( (x_bypass_enable_x == `FALSE) + && ( ((read_enable_0_d == `TRUE) && (raw_x_0 == `TRUE)) + || ((read_enable_1_d == `TRUE) && (raw_x_1 == `TRUE)) + ) + ) + || ( (m_bypass_enable_m == `FALSE) + && ( ((read_enable_0_d == `TRUE) && (raw_m_0 == `TRUE)) + || ((read_enable_1_d == `TRUE) && (raw_m_1 == `TRUE)) + ) + ) + ) + interlock = `TRUE; + else + interlock = `FALSE; +end + +// Bypass for reg port 0 +always @(*) +begin + if (raw_x_0 == `TRUE) + bypass_data_0 = x_result; + else if (raw_m_0 == `TRUE) + bypass_data_0 = m_result; + else if (raw_w_0 == `TRUE) + bypass_data_0 = w_result; + else + bypass_data_0 = reg_data_0; +end + +// Bypass for reg port 1 +always @(*) +begin + if (raw_x_1 == `TRUE) + bypass_data_1 = x_result; + else if (raw_m_1 == `TRUE) + bypass_data_1 = m_result; + else if (raw_w_1 == `TRUE) + bypass_data_1 = w_result; + else + bypass_data_1 = reg_data_1; +end + + /*---------------------------------------------------------------------- + Branch prediction is performed in D stage of pipeline. Only PC-relative + branches are predicted: forward-pointing conditional branches are not- + taken, while backward-pointing conditional branches are taken. + Unconditional branches are always predicted taken! + ----------------------------------------------------------------------*/ + assign branch_predict_d = bi_unconditional | bi_conditional; + assign branch_predict_taken_d = bi_unconditional ? 1'b1 : (bi_conditional ? instruction_d[15] : 1'b0); + + // Compute branch target address: Branch PC PLUS Offset + assign branch_target_d = pc_d + branch_offset_d; + + // Compute fetch address. Address of instruction sequentially after the + // branch if branch is not taken. Target address of branch is branch is + // taken + assign branch_predict_address_d = branch_predict_taken_d ? branch_target_d : pc_f; + +// D stage result selection +always @(*) +begin + d_result_0 = d_result_sel_0_d[0] ? {pc_f, 2'b00} : bypass_data_0; + case (d_result_sel_1_d) + `LM32_D_RESULT_SEL_1_ZERO: d_result_1 = {`LM32_WORD_WIDTH{1'b0}}; + `LM32_D_RESULT_SEL_1_REG_1: d_result_1 = bypass_data_1; + `LM32_D_RESULT_SEL_1_IMMEDIATE: d_result_1 = immediate_d; + default: d_result_1 = {`LM32_WORD_WIDTH{1'bx}}; + endcase +end + +`ifdef CFG_USER_ENABLED +// Operands for user-defined instructions +assign user_operand_0 = operand_0_x; +assign user_operand_1 = operand_1_x; +`endif + +`ifdef CFG_SIGN_EXTEND_ENABLED +// Sign-extension +assign sextb_result_x = {{24{operand_0_x[7]}}, operand_0_x[7:0]}; +assign sexth_result_x = {{16{operand_0_x[15]}}, operand_0_x[15:0]}; +assign sext_result_x = size_x == `LM32_SIZE_BYTE ? sextb_result_x : sexth_result_x; +`endif + +`ifdef LM32_NO_BARREL_SHIFT +// Only single bit shift operations are supported when barrel-shifter isn't implemented +assign shifter_result_x = {operand_0_x[`LM32_WORD_WIDTH-1] & sign_extend_x, operand_0_x[`LM32_WORD_WIDTH-1:1]}; +`endif + +// Condition evaluation +assign cmp_zero = operand_0_x == operand_1_x; +assign cmp_negative = adder_result_x[`LM32_WORD_WIDTH-1]; +assign cmp_overflow = adder_overflow_x; +assign cmp_carry_n = adder_carry_n_x; +always @(*) +begin + case (condition_x) + `LM32_CONDITION_U1: condition_met_x = `TRUE; + `LM32_CONDITION_U2: condition_met_x = `TRUE; + `LM32_CONDITION_E: condition_met_x = cmp_zero; + `LM32_CONDITION_NE: condition_met_x = !cmp_zero; + `LM32_CONDITION_G: condition_met_x = !cmp_zero && (cmp_negative == cmp_overflow); + `LM32_CONDITION_GU: condition_met_x = cmp_carry_n && !cmp_zero; + `LM32_CONDITION_GE: condition_met_x = cmp_negative == cmp_overflow; + `LM32_CONDITION_GEU: condition_met_x = cmp_carry_n; + default: condition_met_x = 1'bx; + endcase +end + +// X stage result selection +always @(*) +begin + x_result = x_result_sel_add_x ? adder_result_x + : x_result_sel_csr_x ? csr_read_data_x +`ifdef CFG_SIGN_EXTEND_ENABLED + : x_result_sel_sext_x ? sext_result_x +`endif +`ifdef CFG_USER_ENABLED + : x_result_sel_user_x ? user_result +`endif +`ifdef LM32_NO_BARREL_SHIFT + : x_result_sel_shift_x ? shifter_result_x +`endif +`ifdef LM32_MC_ARITHMETIC_ENABLED + : x_result_sel_mc_arith_x ? mc_result_x +`endif + : logic_result_x; +end + +// M stage result selection +always @(*) +begin + m_result = m_result_sel_compare_m ? {{`LM32_WORD_WIDTH-1{1'b0}}, condition_met_m} +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + : m_result_sel_shift_m ? shifter_result_m +`endif + : operand_m; +end + +// W stage result selection +always @(*) +begin + w_result = w_result_sel_load_w ? load_data_w +`ifdef CFG_PL_MULTIPLY_ENABLED + : w_result_sel_mul_w ? multiplier_result_w +`endif + : operand_w; +end + +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH +// Indicate when a branch should be taken in X stage +assign branch_taken_x = (stall_x == `FALSE) + && ( (branch_x == `TRUE) + && ((condition_x == `LM32_CONDITION_U1) || (condition_x == `LM32_CONDITION_U2)) + && (valid_x == `TRUE) + && (branch_predict_x == `FALSE) + ); +`endif + +// Indicate when a branch should be taken in M stage (exceptions are a type of branch) +assign branch_taken_m = (stall_m == `FALSE) + && ( ( (branch_m == `TRUE) + && (valid_m == `TRUE) + && ( ( (condition_met_m == `TRUE) + && (branch_predict_taken_m == `FALSE) + ) + || ( (condition_met_m == `FALSE) + && (branch_predict_m == `TRUE) + && (branch_predict_taken_m == `TRUE) + ) + ) + ) + || (exception_m == `TRUE) + ); + +// Indicate when a branch in M stage is mispredicted as being taken +assign branch_mispredict_taken_m = (condition_met_m == `FALSE) + && (branch_predict_m == `TRUE) + && (branch_predict_taken_m == `TRUE); + +// Indicate when a branch in M stage will cause flush in X stage +assign branch_flushX_m = (stall_m == `FALSE) + && ( ( (branch_m == `TRUE) + && (valid_m == `TRUE) + && ( (condition_met_m == `TRUE) + || ( (condition_met_m == `FALSE) + && (branch_predict_m == `TRUE) + && (branch_predict_taken_m == `TRUE) + ) + ) + ) + || (exception_m == `TRUE) + ); + +// Generate signal that will kill instructions in each pipeline stage when necessary +assign kill_f = ( (valid_d == `TRUE) + && (branch_predict_taken_d == `TRUE) + ) + || (branch_taken_m == `TRUE) +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH + || (branch_taken_x == `TRUE) +`endif +`ifdef CFG_ICACHE_ENABLED + || (icache_refill_request == `TRUE) +`endif +`ifdef CFG_DCACHE_ENABLED + || (dcache_refill_request == `TRUE) +`endif + ; +assign kill_d = (branch_taken_m == `TRUE) +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH + || (branch_taken_x == `TRUE) +`endif +`ifdef CFG_ICACHE_ENABLED + || (icache_refill_request == `TRUE) +`endif +`ifdef CFG_DCACHE_ENABLED + || (dcache_refill_request == `TRUE) +`endif + ; +assign kill_x = (branch_flushX_m == `TRUE) +`ifdef CFG_DCACHE_ENABLED + || (dcache_refill_request == `TRUE) +`endif + ; +assign kill_m = `FALSE +`ifdef CFG_DCACHE_ENABLED + || (dcache_refill_request == `TRUE) +`endif + ; +assign kill_w = `FALSE +`ifdef CFG_DCACHE_ENABLED + || (dcache_refill_request == `TRUE) +`endif + ; + +// Exceptions + +`ifdef CFG_DEBUG_ENABLED +assign breakpoint_exception = ( ( (break_x == `TRUE) + || (bp_match == `TRUE) + ) + && (valid_x == `TRUE) + ) +`ifdef CFG_JTAG_ENABLED + || (jtag_break == `TRUE) +`endif +`ifdef CFG_EXTERNAL_BREAK_ENABLED + || (ext_break_r == `TRUE) +`endif + ; +`endif + +`ifdef CFG_DEBUG_ENABLED +assign watchpoint_exception = wp_match == `TRUE; +`endif + +`ifdef CFG_BUS_ERRORS_ENABLED +assign instruction_bus_error_exception = ( (bus_error_x == `TRUE) + && (valid_x == `TRUE) + ); +assign data_bus_error_exception = data_bus_error_seen == `TRUE; +`endif + +`ifdef CFG_MC_DIVIDE_ENABLED +assign divide_by_zero_exception = divide_by_zero_x == `TRUE; +`endif + +assign system_call_exception = ( (scall_x == `TRUE) +`ifdef CFG_BUS_ERRORS_ENABLED + && (valid_x == `TRUE) +`endif + ); + +`ifdef CFG_DEBUG_ENABLED +assign debug_exception_x = (breakpoint_exception == `TRUE) + || (watchpoint_exception == `TRUE) + ; + +assign non_debug_exception_x = (system_call_exception == `TRUE) +`ifdef CFG_JTAG_ENABLED + || (reset_exception == `TRUE) +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + || (instruction_bus_error_exception == `TRUE) + || (data_bus_error_exception == `TRUE) +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + || (divide_by_zero_exception == `TRUE) +`endif +`ifdef CFG_INTERRUPTS_ENABLED + || ( (interrupt_exception == `TRUE) +`ifdef LM32_SINGLE_STEP_ENABLED + && (dc_ss == `FALSE) +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + && (store_q_m == `FALSE) + && (D_CYC_O == `FALSE) +`endif + ) +`endif + ; + +assign exception_x = (debug_exception_x == `TRUE) || (non_debug_exception_x == `TRUE); +`else +assign exception_x = (system_call_exception == `TRUE) +`ifdef CFG_BUS_ERRORS_ENABLED + || (instruction_bus_error_exception == `TRUE) + || (data_bus_error_exception == `TRUE) +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + || (divide_by_zero_exception == `TRUE) +`endif +`ifdef CFG_INTERRUPTS_ENABLED + || ( (interrupt_exception == `TRUE) +`ifdef LM32_SINGLE_STEP_ENABLED + && (dc_ss == `FALSE) +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + && (store_q_m == `FALSE) + && (D_CYC_O == `FALSE) +`endif + ) +`endif + ; +`endif + +// Exception ID +always @(*) +begin +`ifdef CFG_DEBUG_ENABLED +`ifdef CFG_JTAG_ENABLED + if (reset_exception == `TRUE) + eid_x = `LM32_EID_RESET; + else +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + if (data_bus_error_exception == `TRUE) + eid_x = `LM32_EID_DATA_BUS_ERROR; + else +`endif + if (breakpoint_exception == `TRUE) + eid_x = `LM32_EID_BREAKPOINT; + else +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + if (data_bus_error_exception == `TRUE) + eid_x = `LM32_EID_DATA_BUS_ERROR; + else + if (instruction_bus_error_exception == `TRUE) + eid_x = `LM32_EID_INST_BUS_ERROR; + else +`endif +`ifdef CFG_DEBUG_ENABLED + if (watchpoint_exception == `TRUE) + eid_x = `LM32_EID_WATCHPOINT; + else +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + if (divide_by_zero_exception == `TRUE) + eid_x = `LM32_EID_DIVIDE_BY_ZERO; + else +`endif +`ifdef CFG_INTERRUPTS_ENABLED + if ( (interrupt_exception == `TRUE) +`ifdef LM32_SINGLE_STEP_ENABLED + && (dc_ss == `FALSE) +`endif + ) + eid_x = `LM32_EID_INTERRUPT; + else +`endif + eid_x = `LM32_EID_SCALL; +end + +// Stall generation + +assign stall_a = (stall_f == `TRUE); + +assign stall_f = (stall_d == `TRUE); + +assign stall_d = (stall_x == `TRUE) + || ( (interlock == `TRUE) + && (kill_d == `FALSE) + ) + || ( ( (eret_d == `TRUE) + || (scall_d == `TRUE) +`ifdef CFG_BUS_ERRORS_ENABLED + || (bus_error_d == `TRUE) +`endif + ) + && ( (load_q_x == `TRUE) + || (load_q_m == `TRUE) + || (store_q_x == `TRUE) + || (store_q_m == `TRUE) + || (D_CYC_O == `TRUE) + ) + && (kill_d == `FALSE) + ) +`ifdef CFG_DEBUG_ENABLED + || ( ( (break_d == `TRUE) + || (bret_d == `TRUE) + ) + && ( (load_q_x == `TRUE) + || (store_q_x == `TRUE) + || (load_q_m == `TRUE) + || (store_q_m == `TRUE) + || (D_CYC_O == `TRUE) + ) + && (kill_d == `FALSE) + ) +`endif + || ( (csr_write_enable_d == `TRUE) + && (load_q_x == `TRUE) + ) + ; + +assign stall_x = (stall_m == `TRUE) +`ifdef LM32_MC_ARITHMETIC_ENABLED + || ( (mc_stall_request_x == `TRUE) + && (kill_x == `FALSE) + ) +`endif +`ifdef CFG_IROM_ENABLED + // Stall load/store instruction in D stage if there is an ongoing store + // operation to instruction ROM in M stage + || ( (irom_stall_request_x == `TRUE) + && ( (load_d == `TRUE) + || (store_d == `TRUE) + ) + ) +`endif + ; + +assign stall_m = (stall_wb_load == `TRUE) +`ifdef CFG_SIZE_OVER_SPEED + || (D_CYC_O == `TRUE) +`else + || ( (D_CYC_O == `TRUE) + && ( (store_m == `TRUE) + /* + Bug: Following loop does not allow interrupts to be services since + either D_CYC_O or store_m is always high during entire duration of + loop. + L1: addi r1, r1, 1 + sw (r2,0), r1 + bi L1 + + Introduce a single-cycle stall when a wishbone cycle is in progress + and a new store instruction is in Execute stage and a interrupt + exception has occured. This stall will ensure that D_CYC_O and + store_m will both be low for one cycle. + */ +`ifdef CFG_INTERRUPTS_ENABLED + || ((store_x == `TRUE) && (interrupt_exception == `TRUE)) +`endif + || (load_m == `TRUE) + || (load_x == `TRUE) + ) + ) +`endif +`ifdef CFG_DCACHE_ENABLED + || (dcache_stall_request == `TRUE) // Need to stall in case a taken branch is in M stage and data cache is only being flush, so wont be restarted +`endif +`ifdef CFG_ICACHE_ENABLED + || (icache_stall_request == `TRUE) // Pipeline needs to be stalled otherwise branches may be lost + || ((I_CYC_O == `TRUE) && ((branch_m == `TRUE) || (exception_m == `TRUE))) +`else +`ifdef CFG_IWB_ENABLED + || (I_CYC_O == `TRUE) +`endif +`endif +`ifdef CFG_USER_ENABLED + || ( (user_valid == `TRUE) // Stall whole pipeline, rather than just X stage, where the instruction is, so we don't have to worry about exceptions (maybe) + && (user_complete == `FALSE) + ) +`endif + ; + +// Qualify state changing control signals +`ifdef LM32_MC_ARITHMETIC_ENABLED +assign q_d = (valid_d == `TRUE) && (kill_d == `FALSE); +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +assign shift_left_q_d = (shift_left_d == `TRUE) && (q_d == `TRUE); +assign shift_right_q_d = (shift_right_d == `TRUE) && (q_d == `TRUE); +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED +assign multiply_q_d = (multiply_d == `TRUE) && (q_d == `TRUE); +`endif +`ifdef CFG_MC_DIVIDE_ENABLED +assign divide_q_d = (divide_d == `TRUE) && (q_d == `TRUE); +assign modulus_q_d = (modulus_d == `TRUE) && (q_d == `TRUE); +`endif +assign q_x = (valid_x == `TRUE) && (kill_x == `FALSE); +assign csr_write_enable_q_x = (csr_write_enable_x == `TRUE) && (q_x == `TRUE); +assign eret_q_x = (eret_x == `TRUE) && (q_x == `TRUE); +`ifdef CFG_DEBUG_ENABLED +assign bret_q_x = (bret_x == `TRUE) && (q_x == `TRUE); +`endif +assign load_q_x = (load_x == `TRUE) + && (q_x == `TRUE) +`ifdef CFG_DEBUG_ENABLED + && (bp_match == `FALSE) +`endif + ; +assign store_q_x = (store_x == `TRUE) + && (q_x == `TRUE) +`ifdef CFG_DEBUG_ENABLED + && (bp_match == `FALSE) +`endif + ; +`ifdef CFG_USER_ENABLED +assign user_valid = (x_result_sel_user_x == `TRUE) && (q_x == `TRUE); +`endif +assign q_m = (valid_m == `TRUE) && (kill_m == `FALSE) && (exception_m == `FALSE); +assign load_q_m = (load_m == `TRUE) && (q_m == `TRUE); +assign store_q_m = (store_m == `TRUE) && (q_m == `TRUE); +`ifdef CFG_DEBUG_ENABLED +assign debug_exception_q_w = ((debug_exception_w == `TRUE) && (valid_w == `TRUE)); +assign non_debug_exception_q_w = ((non_debug_exception_w == `TRUE) && (valid_w == `TRUE)); +`else +assign exception_q_w = ((exception_w == `TRUE) && (valid_w == `TRUE)); +`endif +// Don't qualify register write enables with kill, as the signal is needed early, and it doesn't matter if the instruction is killed (except for the actual write - but that is handled separately) +assign write_enable_q_x = (write_enable_x == `TRUE) && (valid_x == `TRUE) && (branch_flushX_m == `FALSE); +assign write_enable_q_m = (write_enable_m == `TRUE) && (valid_m == `TRUE); +assign write_enable_q_w = (write_enable_w == `TRUE) && (valid_w == `TRUE); +// The enable that actually does write the registers needs to be qualified with kill +assign reg_write_enable_q_w = (write_enable_w == `TRUE) && (kill_w == `FALSE) && (valid_w == `TRUE); + +// Configuration (CFG) CSR +assign cfg = { + `LM32_REVISION, + watchpoints[3:0], + breakpoints[3:0], + interrupts[5:0], +`ifdef CFG_JTAG_UART_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_ROM_DEBUG_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_HW_DEBUG_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_DEBUG_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_ICACHE_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_DCACHE_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_CYCLE_COUNTER_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_USER_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef LM32_BARREL_SHIFT_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef LM32_MULTIPLY_ENABLED + `TRUE +`else + `FALSE +`endif + }; + +assign cfg2 = { + 30'b0, +`ifdef CFG_IROM_ENABLED + `TRUE, +`else + `FALSE, +`endif +`ifdef CFG_DRAM_ENABLED + `TRUE +`else + `FALSE +`endif + }; + +// Cache flush +`ifdef CFG_ICACHE_ENABLED +assign iflush = ( (csr_write_enable_d == `TRUE) + && (csr_d == `LM32_CSR_ICC) + && (stall_d == `FALSE) + && (kill_d == `FALSE) + && (valid_d == `TRUE)) +// Added by GSI: needed to flush cache after loading firmware per JTAG +`ifdef CFG_HW_DEBUG_ENABLED + || + ( (jtag_csr_write_enable == `TRUE) + && (jtag_csr == `LM32_CSR_ICC)) +`endif + ; +`endif +`ifdef CFG_DCACHE_ENABLED +assign dflush_x = ( (csr_write_enable_q_x == `TRUE) + && (csr_x == `LM32_CSR_DCC)) +// Added by GSI: needed to flush cache after loading firmware per JTAG +`ifdef CFG_HW_DEBUG_ENABLED + || + ( (jtag_csr_write_enable == `TRUE) + && (jtag_csr == `LM32_CSR_DCC)) +`endif + ; +`endif + +// Extract CSR index +assign csr_d = read_idx_0_d[`LM32_CSR_RNG]; + +// CSR reads +always @(*) +begin + case (csr_x) +`ifdef CFG_INTERRUPTS_ENABLED + `LM32_CSR_IE, + `LM32_CSR_IM, + `LM32_CSR_IP: csr_read_data_x = interrupt_csr_read_data_x; +`endif +`ifdef CFG_CYCLE_COUNTER_ENABLED + `LM32_CSR_CC: csr_read_data_x = cc; +`endif + `LM32_CSR_CFG: csr_read_data_x = cfg; + `LM32_CSR_EBA: csr_read_data_x = {eba, 8'h00}; +`ifdef CFG_DEBUG_ENABLED + `LM32_CSR_DEBA: csr_read_data_x = {deba, 8'h00}; +`endif +`ifdef CFG_JTAG_UART_ENABLED + `LM32_CSR_JTX: csr_read_data_x = jtx_csr_read_data; + `LM32_CSR_JRX: csr_read_data_x = jrx_csr_read_data; +`endif + `LM32_CSR_CFG2: csr_read_data_x = cfg2; + + default: csr_read_data_x = {`LM32_WORD_WIDTH{1'bx}}; + endcase +end + +///////////////////////////////////////////////////// +// Sequential Logic +///////////////////////////////////////////////////// + +// Exception Base Address (EBA) CSR +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + eba <= eba_reset[`LM32_PC_WIDTH+2-1:8]; + else + begin + if ((csr_write_enable_q_x == `TRUE) && (csr_x == `LM32_CSR_EBA) && (stall_x == `FALSE)) + eba <= operand_1_x[`LM32_PC_WIDTH+2-1:8]; +`ifdef CFG_HW_DEBUG_ENABLED + if ((jtag_csr_write_enable == `TRUE) && (jtag_csr == `LM32_CSR_EBA)) + eba <= jtag_csr_write_data[`LM32_PC_WIDTH+2-1:8]; +`endif + end +end + +`ifdef CFG_DEBUG_ENABLED +// Debug Exception Base Address (DEBA) CSR +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + deba <= deba_reset[`LM32_PC_WIDTH+2-1:8]; + else + begin + if ((csr_write_enable_q_x == `TRUE) && (csr_x == `LM32_CSR_DEBA) && (stall_x == `FALSE)) + deba <= operand_1_x[`LM32_PC_WIDTH+2-1:8]; +`ifdef CFG_HW_DEBUG_ENABLED + if ((jtag_csr_write_enable == `TRUE) && (jtag_csr == `LM32_CSR_DEBA)) + deba <= jtag_csr_write_data[`LM32_PC_WIDTH+2-1:8]; +`endif + end +end +`endif + +// Cycle Counter (CC) CSR +`ifdef CFG_CYCLE_COUNTER_ENABLED +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + cc <= {`LM32_WORD_WIDTH{1'b0}}; + else + cc <= cc + 1'b1; +end +`endif + +`ifdef CFG_BUS_ERRORS_ENABLED +// Watch for data bus errors +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + data_bus_error_seen <= `FALSE; + else + begin + // Set flag when bus error is detected + if ((D_ERR_I == `TRUE) && (D_CYC_O == `TRUE)) + data_bus_error_seen <= `TRUE; + // Clear flag when exception is taken + if ((exception_m == `TRUE) && (kill_m == `FALSE)) + data_bus_error_seen <= `FALSE; + end +end +`endif + +`ifdef CFG_EXTERNAL_BREAK_ENABLED +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + ext_break_r <= `FALSE; + else + begin + if (ext_break == `TRUE) + ext_break_r <= `TRUE; + if (debug_exception_q_w == `TRUE) + ext_break_r <= `FALSE; + end +end +`endif + +// Valid bits to indicate whether an instruction in a partcular pipeline stage is valid or not + +`ifdef CFG_ICACHE_ENABLED +`ifdef CFG_DCACHE_ENABLED +always @(*) +begin + if ( (icache_refill_request == `TRUE) + || (dcache_refill_request == `TRUE) + ) + valid_a = `FALSE; + else if ( (icache_restart_request == `TRUE) + || (dcache_restart_request == `TRUE) + ) + valid_a = `TRUE; + else + valid_a = !icache_refilling && !dcache_refilling; +end +`else +always @(*) +begin + if (icache_refill_request == `TRUE) + valid_a = `FALSE; + else if (icache_restart_request == `TRUE) + valid_a = `TRUE; + else + valid_a = !icache_refilling; +end +`endif +`else +`ifdef CFG_DCACHE_ENABLED +always @(*) +begin + if (dcache_refill_request == `TRUE) + valid_a = `FALSE; + else if (dcache_restart_request == `TRUE) + valid_a = `TRUE; + else + valid_a = !dcache_refilling; +end +`endif +`endif + +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + valid_f <= `FALSE; + valid_d <= `FALSE; + valid_x <= `FALSE; + valid_m <= `FALSE; + valid_w <= `FALSE; + end + else + begin + if ((kill_f == `TRUE) || (stall_a == `FALSE)) +`ifdef LM32_CACHE_ENABLED + valid_f <= valid_a; +`else + valid_f <= `TRUE; +`endif + else if (stall_f == `FALSE) + valid_f <= `FALSE; + + if (kill_d == `TRUE) + valid_d <= `FALSE; + else if (stall_f == `FALSE) + valid_d <= valid_f & !kill_f; + else if (stall_d == `FALSE) + valid_d <= `FALSE; + + if (stall_d == `FALSE) + valid_x <= valid_d & !kill_d; + else if (kill_x == `TRUE) + valid_x <= `FALSE; + else if (stall_x == `FALSE) + valid_x <= `FALSE; + + if (kill_m == `TRUE) + valid_m <= `FALSE; + else if (stall_x == `FALSE) + valid_m <= valid_x & !kill_x; + else if (stall_m == `FALSE) + valid_m <= `FALSE; + + if (stall_m == `FALSE) + valid_w <= valid_m & !kill_m; + else + valid_w <= `FALSE; + end +end + +// Microcode pipeline registers +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin +`ifdef CFG_USER_ENABLED + user_opcode <= {`LM32_USER_OPCODE_WIDTH{1'b0}}; +`endif + operand_0_x <= {`LM32_WORD_WIDTH{1'b0}}; + operand_1_x <= {`LM32_WORD_WIDTH{1'b0}}; + store_operand_x <= {`LM32_WORD_WIDTH{1'b0}}; + branch_target_x <= {`LM32_PC_WIDTH{1'b0}}; + x_result_sel_csr_x <= `FALSE; +`ifdef LM32_MC_ARITHMETIC_ENABLED + x_result_sel_mc_arith_x <= `FALSE; +`endif +`ifdef LM32_NO_BARREL_SHIFT + x_result_sel_shift_x <= `FALSE; +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + x_result_sel_sext_x <= `FALSE; +`endif + x_result_sel_logic_x <= `FALSE; +`ifdef CFG_USER_ENABLED + x_result_sel_user_x <= `FALSE; +`endif + x_result_sel_add_x <= `FALSE; + m_result_sel_compare_x <= `FALSE; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + m_result_sel_shift_x <= `FALSE; +`endif + w_result_sel_load_x <= `FALSE; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul_x <= `FALSE; +`endif + x_bypass_enable_x <= `FALSE; + m_bypass_enable_x <= `FALSE; + write_enable_x <= `FALSE; + write_idx_x <= {`LM32_REG_IDX_WIDTH{1'b0}}; + csr_x <= {`LM32_CSR_WIDTH{1'b0}}; + load_x <= `FALSE; + store_x <= `FALSE; + size_x <= {`LM32_SIZE_WIDTH{1'b0}}; + sign_extend_x <= `FALSE; + adder_op_x <= `FALSE; + adder_op_x_n <= `FALSE; + logic_op_x <= 4'h0; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + direction_x <= `FALSE; +`endif +`ifdef CFG_ROTATE_ENABLED + rotate_x <= `FALSE; + +`endif + branch_x <= `FALSE; + branch_predict_x <= `FALSE; + branch_predict_taken_x <= `FALSE; + condition_x <= `LM32_CONDITION_U1; +`ifdef CFG_DEBUG_ENABLED + break_x <= `FALSE; +`endif + scall_x <= `FALSE; + eret_x <= `FALSE; +`ifdef CFG_DEBUG_ENABLED + bret_x <= `FALSE; +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_x <= `FALSE; + data_bus_error_exception_m <= `FALSE; +`endif + csr_write_enable_x <= `FALSE; + operand_m <= {`LM32_WORD_WIDTH{1'b0}}; + branch_target_m <= {`LM32_PC_WIDTH{1'b0}}; + m_result_sel_compare_m <= `FALSE; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + m_result_sel_shift_m <= `FALSE; +`endif + w_result_sel_load_m <= `FALSE; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul_m <= `FALSE; +`endif + m_bypass_enable_m <= `FALSE; + branch_m <= `FALSE; + branch_predict_m <= `FALSE; + branch_predict_taken_m <= `FALSE; + exception_m <= `FALSE; + load_m <= `FALSE; + store_m <= `FALSE; + write_enable_m <= `FALSE; + write_idx_m <= {`LM32_REG_IDX_WIDTH{1'b0}}; + condition_met_m <= `FALSE; +`ifdef CFG_DCACHE_ENABLED + dflush_m <= `FALSE; +`endif +`ifdef CFG_DEBUG_ENABLED + debug_exception_m <= `FALSE; + non_debug_exception_m <= `FALSE; +`endif + operand_w <= {`LM32_WORD_WIDTH{1'b0}}; + w_result_sel_load_w <= `FALSE; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul_w <= `FALSE; +`endif + write_idx_w <= {`LM32_REG_IDX_WIDTH{1'b0}}; + write_enable_w <= `FALSE; +`ifdef CFG_DEBUG_ENABLED + debug_exception_w <= `FALSE; + non_debug_exception_w <= `FALSE; +`else + exception_w <= `FALSE; +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + memop_pc_w <= {`LM32_PC_WIDTH{1'b0}}; +`endif + end + else + begin + // D/X stage registers + + if (stall_x == `FALSE) + begin +`ifdef CFG_USER_ENABLED + user_opcode <= user_opcode_d; +`endif + operand_0_x <= d_result_0; + operand_1_x <= d_result_1; + store_operand_x <= bypass_data_1; + branch_target_x <= branch_reg_d == `TRUE ? bypass_data_0[`LM32_PC_RNG] : branch_target_d; + x_result_sel_csr_x <= x_result_sel_csr_d; +`ifdef LM32_MC_ARITHMETIC_ENABLED + x_result_sel_mc_arith_x <= x_result_sel_mc_arith_d; +`endif +`ifdef LM32_NO_BARREL_SHIFT + x_result_sel_shift_x <= x_result_sel_shift_d; +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + x_result_sel_sext_x <= x_result_sel_sext_d; +`endif + x_result_sel_logic_x <= x_result_sel_logic_d; +`ifdef CFG_USER_ENABLED + x_result_sel_user_x <= x_result_sel_user_d; +`endif + x_result_sel_add_x <= x_result_sel_add_d; + m_result_sel_compare_x <= m_result_sel_compare_d; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + m_result_sel_shift_x <= m_result_sel_shift_d; +`endif + w_result_sel_load_x <= w_result_sel_load_d; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul_x <= w_result_sel_mul_d; +`endif + x_bypass_enable_x <= x_bypass_enable_d; + m_bypass_enable_x <= m_bypass_enable_d; + load_x <= load_d; + store_x <= store_d; + branch_x <= branch_d; + branch_predict_x <= branch_predict_d; + branch_predict_taken_x <= branch_predict_taken_d; + write_idx_x <= write_idx_d; + csr_x <= csr_d; + size_x <= size_d; + sign_extend_x <= sign_extend_d; + adder_op_x <= adder_op_d; + adder_op_x_n <= ~adder_op_d; + logic_op_x <= logic_op_d; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + direction_x <= direction_d; +`endif +`ifdef CFG_ROTATE_ENABLED + rotate_x <= rotate_d; +`endif + condition_x <= condition_d; + csr_write_enable_x <= csr_write_enable_d; +`ifdef CFG_DEBUG_ENABLED + break_x <= break_d; +`endif + scall_x <= scall_d; +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_x <= bus_error_d; +`endif + eret_x <= eret_d; +`ifdef CFG_DEBUG_ENABLED + bret_x <= bret_d; +`endif + write_enable_x <= write_enable_d; + end + + // X/M stage registers + + if (stall_m == `FALSE) + begin + operand_m <= x_result; + m_result_sel_compare_m <= m_result_sel_compare_x; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + m_result_sel_shift_m <= m_result_sel_shift_x; +`endif + if (exception_x == `TRUE) + begin + w_result_sel_load_m <= `FALSE; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul_m <= `FALSE; +`endif + end + else + begin + w_result_sel_load_m <= w_result_sel_load_x; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul_m <= w_result_sel_mul_x; +`endif + end + m_bypass_enable_m <= m_bypass_enable_x; + load_m <= load_x; + store_m <= store_x; +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH + branch_m <= branch_x && !branch_taken_x; +`else + branch_m <= branch_x; + branch_predict_m <= branch_predict_x; + branch_predict_taken_m <= branch_predict_taken_x; +`endif +`ifdef CFG_DEBUG_ENABLED + // Data bus errors are generated by the wishbone and are + // made known to the processor only in next cycle (as a + // non-debug exception). A break instruction can be seen + // in same cycle (causing a debug exception). Handle non + // -debug exception first! + if (non_debug_exception_x == `TRUE) + write_idx_m <= `LM32_EA_REG; + else if (debug_exception_x == `TRUE) + write_idx_m <= `LM32_BA_REG; + else + write_idx_m <= write_idx_x; +`else + if (exception_x == `TRUE) + write_idx_m <= `LM32_EA_REG; + else + write_idx_m <= write_idx_x; +`endif + condition_met_m <= condition_met_x; +`ifdef CFG_DEBUG_ENABLED + if (exception_x == `TRUE) + if ((dc_re == `TRUE) +`ifdef CFG_ALTERNATE_EBA + || (at_debug == `TRUE) +`endif + + || ((debug_exception_x == `TRUE) + && (non_debug_exception_x == `FALSE))) + branch_target_m <= {deba, eid_x, {3{1'b0}}}; + else + branch_target_m <= {eba, eid_x, {3{1'b0}}}; + else + branch_target_m <= branch_target_x; +`else + branch_target_m <= exception_x == `TRUE ? {eba, eid_x, {3{1'b0}}} : branch_target_x; +`endif +`ifdef CFG_TRACE_ENABLED + eid_m <= eid_x; +`endif +`ifdef CFG_DCACHE_ENABLED + dflush_m <= dflush_x; +`endif + eret_m <= eret_q_x; +`ifdef CFG_DEBUG_ENABLED + bret_m <= bret_q_x; +`endif + write_enable_m <= exception_x == `TRUE ? `TRUE : write_enable_x; +`ifdef CFG_DEBUG_ENABLED + debug_exception_m <= debug_exception_x; + non_debug_exception_m <= non_debug_exception_x; +`endif + end + + // State changing regs + if (stall_m == `FALSE) + begin + if ((exception_x == `TRUE) && (q_x == `TRUE) && (stall_x == `FALSE)) + exception_m <= `TRUE; + else + exception_m <= `FALSE; +`ifdef CFG_BUS_ERRORS_ENABLED + data_bus_error_exception_m <= (data_bus_error_exception == `TRUE) +`ifdef CFG_DEBUG_ENABLED + && (reset_exception == `FALSE) +`endif + ; +`endif + end + + // M/W stage registers +`ifdef CFG_BUS_ERRORS_ENABLED + operand_w <= exception_m == `TRUE ? (data_bus_error_exception_m ? {memop_pc_w, 2'b00} : {pc_m, 2'b00}) : m_result; +`else + operand_w <= exception_m == `TRUE ? {pc_m, 2'b00} : m_result; +`endif + w_result_sel_load_w <= w_result_sel_load_m; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul_w <= w_result_sel_mul_m; +`endif + write_idx_w <= write_idx_m; +`ifdef CFG_TRACE_ENABLED + eid_w <= eid_m; + eret_w <= eret_m; +`ifdef CFG_DEBUG_ENABLED + bret_w <= bret_m; +`endif +`endif + write_enable_w <= write_enable_m; +`ifdef CFG_DEBUG_ENABLED + debug_exception_w <= debug_exception_m; + non_debug_exception_w <= non_debug_exception_m; +`else + exception_w <= exception_m; +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + if ( (stall_m == `FALSE) + && (data_bus_error_exception == `FALSE) + && ( (load_q_m == `TRUE) + || (store_q_m == `TRUE) + ) + ) + memop_pc_w <= pc_m; +`endif + end +end + +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE +// Buffer data read from register file, in case a stall occurs, and watch for +// any writes to the modified registers +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + use_buf <= `FALSE; + reg_data_buf_0 <= {`LM32_WORD_WIDTH{1'b0}}; + reg_data_buf_1 <= {`LM32_WORD_WIDTH{1'b0}}; + end + else + begin + if (stall_d == `FALSE) + use_buf <= `FALSE; + else if (use_buf == `FALSE) + begin + reg_data_buf_0 <= reg_data_live_0; + reg_data_buf_1 <= reg_data_live_1; + use_buf <= `TRUE; + end + if (reg_write_enable_q_w == `TRUE) + begin + if (write_idx_w == read_idx_0_d) + reg_data_buf_0 <= w_result; + if (write_idx_w == read_idx_1_d) + reg_data_buf_1 <= w_result; + end + end +end +`endif + +`ifdef LM32_EBR_REGISTER_FILE +`else +// Register file write port +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) begin + registers[0] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[1] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[2] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[3] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[4] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[5] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[6] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[7] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[8] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[9] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[10] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[11] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[12] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[13] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[14] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[15] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[16] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[17] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[18] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[19] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[20] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[21] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[22] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[23] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[24] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[25] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[26] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[27] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[28] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[29] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[30] <= {`LM32_WORD_WIDTH{1'b0}}; + registers[31] <= {`LM32_WORD_WIDTH{1'b0}}; + end + else begin + if (reg_write_enable_q_w == `TRUE) + registers[write_idx_w] <= w_result; + end +end +`endif + +`ifdef CFG_TRACE_ENABLED +// PC tracing logic +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + trace_pc_valid <= `FALSE; + trace_pc <= {`LM32_PC_WIDTH{1'b0}}; + trace_exception <= `FALSE; + trace_eid <= `LM32_EID_RESET; + trace_eret <= `FALSE; +`ifdef CFG_DEBUG_ENABLED + trace_bret <= `FALSE; +`endif + pc_c <= `CFG_EBA_RESET/4; + end + else + begin + trace_pc_valid <= `FALSE; + // Has an exception occured +`ifdef CFG_DEBUG_ENABLED + if ((debug_exception_q_w == `TRUE) || (non_debug_exception_q_w == `TRUE)) +`else + if (exception_q_w == `TRUE) +`endif + begin + trace_exception <= `TRUE; + trace_pc_valid <= `TRUE; + trace_pc <= pc_w; + trace_eid <= eid_w; + end + else + trace_exception <= `FALSE; + + if ((valid_w == `TRUE) && (!kill_w)) + begin + // An instruction is commiting. Determine if it is non-sequential + if (pc_c + 1'b1 != pc_w) + begin + // Non-sequential instruction + trace_pc_valid <= `TRUE; + trace_pc <= pc_w; + end + // Record PC so we can determine if next instruction is sequential or not + pc_c <= pc_w; + // Indicate if it was an eret/bret instruction + trace_eret <= eret_w; +`ifdef CFG_DEBUG_ENABLED + trace_bret <= bret_w; +`endif + end + else + begin + trace_eret <= `FALSE; +`ifdef CFG_DEBUG_ENABLED + trace_bret <= `FALSE; +`endif + end + end +end +`endif + +endmodule diff --git a/verilog/lm32/lm32_dcache.v b/verilog/lm32/lm32_dcache.v new file mode 100644 index 00000000..71e4c0bf --- /dev/null +++ b/verilog/lm32/lm32_dcache.v @@ -0,0 +1,527 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_dcache.v +// Title : Data cache +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : Support for user-selected resource usage when implementing +// : cache memory. Additional parameters must be defined when +// : invoking lm32_ram.v +// ============================================================================= + +`include "lm32_include.v" + +`ifdef CFG_DCACHE_ENABLED + +`define LM32_DC_ADDR_OFFSET_RNG addr_offset_msb:addr_offset_lsb +`define LM32_DC_ADDR_SET_RNG addr_set_msb:addr_set_lsb +`define LM32_DC_ADDR_TAG_RNG addr_tag_msb:addr_tag_lsb +`define LM32_DC_ADDR_IDX_RNG addr_set_msb:addr_offset_lsb + +`define LM32_DC_TMEM_ADDR_WIDTH addr_set_width +`define LM32_DC_TMEM_ADDR_RNG (`LM32_DC_TMEM_ADDR_WIDTH-1):0 +`define LM32_DC_DMEM_ADDR_WIDTH (addr_offset_width+addr_set_width) +`define LM32_DC_DMEM_ADDR_RNG (`LM32_DC_DMEM_ADDR_WIDTH-1):0 + +`define LM32_DC_TAGS_WIDTH (addr_tag_width+1) +`define LM32_DC_TAGS_RNG (`LM32_DC_TAGS_WIDTH-1):0 +`define LM32_DC_TAGS_TAG_RNG (`LM32_DC_TAGS_WIDTH-1):1 +`define LM32_DC_TAGS_VALID_RNG 0 + +`define LM32_DC_STATE_RNG 2:0 +`define LM32_DC_STATE_FLUSH 3'b001 +`define LM32_DC_STATE_CHECK 3'b010 +`define LM32_DC_STATE_REFILL 3'b100 + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_dcache ( + // ----- Inputs ----- + clk_i, + rst_i, + stall_a, + stall_x, + stall_m, + address_x, + address_m, + load_q_m, + store_q_m, + store_data, + store_byte_select, + refill_ready, + refill_data, + dflush, + // ----- Outputs ----- + stall_request, + restart_request, + refill_request, + refill_address, + refilling, + load_data + ); + +///////////////////////////////////////////////////// +// Parameters +///////////////////////////////////////////////////// + +parameter associativity = 1; // Associativity of the cache (Number of ways) +parameter sets = 512; // Number of sets +parameter bytes_per_line = 16; // Number of bytes per cache line +parameter base_address = 0; // Base address of cachable memory +parameter limit = 0; // Limit (highest address) of cachable memory + +localparam addr_offset_width = clogb2(bytes_per_line)-1-2; +localparam addr_set_width = clogb2(sets)-1; +localparam addr_offset_lsb = 2; +localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1); +localparam addr_set_lsb = (addr_offset_msb+1); +localparam addr_set_msb = (addr_set_lsb+addr_set_width-1); +localparam addr_tag_lsb = (addr_set_msb+1); +localparam addr_tag_msb = clogb2(`CFG_DCACHE_LIMIT-`CFG_DCACHE_BASE_ADDRESS)-1; +localparam addr_tag_width = (addr_tag_msb-addr_tag_lsb+1); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +input stall_a; // Stall A stage +input stall_x; // Stall X stage +input stall_m; // Stall M stage + +input [`LM32_WORD_RNG] address_x; // X stage load/store address +input [`LM32_WORD_RNG] address_m; // M stage load/store address +input load_q_m; // Load instruction in M stage +input store_q_m; // Store instruction in M stage +input [`LM32_WORD_RNG] store_data; // Data to store +input [`LM32_BYTE_SELECT_RNG] store_byte_select; // Which bytes in store data should be modified + +input refill_ready; // Indicates next word of refill data is ready +input [`LM32_WORD_RNG] refill_data; // Refill data + +input dflush; // Indicates cache should be flushed + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output stall_request; // Request pipeline be stalled because cache is busy +wire stall_request; +output restart_request; // Request to restart instruction that caused the cache miss +reg restart_request; +output refill_request; // Request a refill +reg refill_request; +output [`LM32_WORD_RNG] refill_address; // Address to refill from +reg [`LM32_WORD_RNG] refill_address; +output refilling; // Indicates if the cache is currently refilling +reg refilling; +output [`LM32_WORD_RNG] load_data; // Data read from cache +wire [`LM32_WORD_RNG] load_data; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +wire read_port_enable; // Cache memory read port clock enable +wire write_port_enable; // Cache memory write port clock enable +wire [0:associativity-1] way_tmem_we; // Tag memory write enable +wire [0:associativity-1] way_dmem_we; // Data memory write enable +wire [`LM32_WORD_RNG] way_data[0:associativity-1]; // Data read from data memory +wire [`LM32_DC_TAGS_TAG_RNG] way_tag[0:associativity-1];// Tag read from tag memory +wire [0:associativity-1] way_valid; // Indicates which ways are valid +wire [0:associativity-1] way_match; // Indicates which ways matched +wire miss; // Indicates no ways matched + +wire [`LM32_DC_TMEM_ADDR_RNG] tmem_read_address; // Tag memory read address +wire [`LM32_DC_TMEM_ADDR_RNG] tmem_write_address; // Tag memory write address +wire [`LM32_DC_DMEM_ADDR_RNG] dmem_read_address; // Data memory read address +wire [`LM32_DC_DMEM_ADDR_RNG] dmem_write_address; // Data memory write address +wire [`LM32_DC_TAGS_RNG] tmem_write_data; // Tag memory write data +reg [`LM32_WORD_RNG] dmem_write_data; // Data memory write data + +reg [`LM32_DC_STATE_RNG] state; // Current state of FSM +wire flushing; // Indicates if cache is currently flushing +wire check; // Indicates if cache is currently checking for hits/misses +wire refill; // Indicates if cache is currently refilling + +wire valid_store; // Indicates if there is a valid store instruction +reg [associativity-1:0] refill_way_select; // Which way should be refilled +reg [`LM32_DC_ADDR_OFFSET_RNG] refill_offset; // Which word in cache line should be refilled +wire last_refill; // Indicates when on last cycle of cache refill +reg [`LM32_DC_TMEM_ADDR_RNG] flush_set; // Which set is currently being flushed + +genvar i, j; + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" + +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + + generate + for (i = 0; i < associativity; i = i + 1) + begin : memories + // Way data + if (`LM32_DC_DMEM_ADDR_WIDTH < 11) + begin : data_memories + lm32_ram + #( + // ----- Parameters ------- + .data_width (32), + .address_width (`LM32_DC_DMEM_ADDR_WIDTH) +// Modified for Milkymist: removed non-portable RAM parameters + ) way_0_data_ram + ( + // ----- Inputs ------- + .read_clk (clk_i), + .write_clk (clk_i), + .reset (rst_i), + .read_address (dmem_read_address), + .enable_read (read_port_enable), + .write_address (dmem_write_address), + .enable_write (write_port_enable), + .write_enable (way_dmem_we[i]), + .write_data (dmem_write_data), + // ----- Outputs ------- + .read_data (way_data[i]) + ); + end + else + begin + for (j = 0; j < 4; j = j + 1) + begin : byte_memories + lm32_ram + #( + // ----- Parameters ------- + .data_width (8), + .address_width (`LM32_DC_DMEM_ADDR_WIDTH) +// Modified for Milkymist: removed non-portable RAM parameters + ) way_0_data_ram + ( + // ----- Inputs ------- + .read_clk (clk_i), + .write_clk (clk_i), + .reset (rst_i), + .read_address (dmem_read_address), + .enable_read (read_port_enable), + .write_address (dmem_write_address), + .enable_write (write_port_enable), + .write_enable (way_dmem_we[i] & (store_byte_select[j] | refill)), + .write_data (dmem_write_data[(j+1)*8-1:j*8]), + // ----- Outputs ------- + .read_data (way_data[i][(j+1)*8-1:j*8]) + ); + end + end + + // Way tags + lm32_ram + #( + // ----- Parameters ------- + .data_width (`LM32_DC_TAGS_WIDTH), + .address_width (`LM32_DC_TMEM_ADDR_WIDTH) +// Modified for Milkymist: removed non-portable RAM parameters + ) way_0_tag_ram + ( + // ----- Inputs ------- + .read_clk (clk_i), + .write_clk (clk_i), + .reset (rst_i), + .read_address (tmem_read_address), + .enable_read (read_port_enable), + .write_address (tmem_write_address), + .enable_write (`TRUE), + .write_enable (way_tmem_we[i]), + .write_data (tmem_write_data), + // ----- Outputs ------- + .read_data ({way_tag[i], way_valid[i]}) + ); + end + + endgenerate + +///////////////////////////////////////////////////// +// Combinational logic +///////////////////////////////////////////////////// + +// Compute which ways in the cache match the address being read +generate + for (i = 0; i < associativity; i = i + 1) + begin : match +assign way_match[i] = ({way_tag[i], way_valid[i]} == {address_m[`LM32_DC_ADDR_TAG_RNG], `TRUE}); + end +endgenerate + +// Select data from way that matched the address being read +generate + if (associativity == 1) + begin : data_1 +assign load_data = way_data[0]; + end + else if (associativity == 2) + begin : data_2 +assign load_data = way_match[0] ? way_data[0] : way_data[1]; + end +endgenerate + +generate + if (`LM32_DC_DMEM_ADDR_WIDTH < 11) + begin +// Select data to write to data memories +always @(*) +begin + if (refill == `TRUE) + dmem_write_data = refill_data; + else + begin + dmem_write_data[`LM32_BYTE_0_RNG] = store_byte_select[0] ? store_data[`LM32_BYTE_0_RNG] : load_data[`LM32_BYTE_0_RNG]; + dmem_write_data[`LM32_BYTE_1_RNG] = store_byte_select[1] ? store_data[`LM32_BYTE_1_RNG] : load_data[`LM32_BYTE_1_RNG]; + dmem_write_data[`LM32_BYTE_2_RNG] = store_byte_select[2] ? store_data[`LM32_BYTE_2_RNG] : load_data[`LM32_BYTE_2_RNG]; + dmem_write_data[`LM32_BYTE_3_RNG] = store_byte_select[3] ? store_data[`LM32_BYTE_3_RNG] : load_data[`LM32_BYTE_3_RNG]; + end +end + end + else + begin +// Select data to write to data memories - FIXME: Should use different write ports on dual port RAMs, but they don't work +always @(*) +begin + if (refill == `TRUE) + dmem_write_data = refill_data; + else + dmem_write_data = store_data; +end + end +endgenerate + +// Compute address to use to index into the data memories +generate + if (bytes_per_line > 4) +assign dmem_write_address = (refill == `TRUE) + ? {refill_address[`LM32_DC_ADDR_SET_RNG], refill_offset} + : address_m[`LM32_DC_ADDR_IDX_RNG]; + else +assign dmem_write_address = (refill == `TRUE) + ? refill_address[`LM32_DC_ADDR_SET_RNG] + : address_m[`LM32_DC_ADDR_IDX_RNG]; +endgenerate +assign dmem_read_address = address_x[`LM32_DC_ADDR_IDX_RNG]; +// Compute address to use to index into the tag memories +assign tmem_write_address = (flushing == `TRUE) + ? flush_set + : refill_address[`LM32_DC_ADDR_SET_RNG]; +assign tmem_read_address = address_x[`LM32_DC_ADDR_SET_RNG]; + +// Compute signal to indicate when we are on the last refill accesses +generate + if (bytes_per_line > 4) +assign last_refill = refill_offset == {addr_offset_width{1'b1}}; + else +assign last_refill = `TRUE; +endgenerate + +// Compute data and tag memory access enable +assign read_port_enable = (stall_x == `FALSE); +assign write_port_enable = (refill_ready == `TRUE) || !stall_m; + +// Determine when we have a valid store +assign valid_store = (store_q_m == `TRUE) && (check == `TRUE); + +// Compute data and tag memory write enables +generate + if (associativity == 1) + begin : we_1 +assign way_dmem_we[0] = (refill_ready == `TRUE) || ((valid_store == `TRUE) && (way_match[0] == `TRUE)); +assign way_tmem_we[0] = (refill_ready == `TRUE) || (flushing == `TRUE); + end + else + begin : we_2 +assign way_dmem_we[0] = ((refill_ready == `TRUE) && (refill_way_select[0] == `TRUE)) || ((valid_store == `TRUE) && (way_match[0] == `TRUE)); +assign way_dmem_we[1] = ((refill_ready == `TRUE) && (refill_way_select[1] == `TRUE)) || ((valid_store == `TRUE) && (way_match[1] == `TRUE)); +assign way_tmem_we[0] = ((refill_ready == `TRUE) && (refill_way_select[0] == `TRUE)) || (flushing == `TRUE); +assign way_tmem_we[1] = ((refill_ready == `TRUE) && (refill_way_select[1] == `TRUE)) || (flushing == `TRUE); + end +endgenerate + +// On the last refill cycle set the valid bit, for all other writes it should be cleared +assign tmem_write_data[`LM32_DC_TAGS_VALID_RNG] = ((last_refill == `TRUE) || (valid_store == `TRUE)) && (flushing == `FALSE); +assign tmem_write_data[`LM32_DC_TAGS_TAG_RNG] = refill_address[`LM32_DC_ADDR_TAG_RNG]; + +// Signals that indicate which state we are in +assign flushing = state[0]; +assign check = state[1]; +assign refill = state[2]; + +assign miss = (~(|way_match)) && (load_q_m == `TRUE) && (stall_m == `FALSE); +assign stall_request = (check == `FALSE); + +///////////////////////////////////////////////////// +// Sequential logic +///////////////////////////////////////////////////// + +// Record way selected for replacement on a cache miss +generate + if (associativity >= 2) + begin : way_select +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + refill_way_select <= {{associativity-1{1'b0}}, 1'b1}; + else + begin + if (refill_request == `TRUE) + refill_way_select <= {refill_way_select[0], refill_way_select[1]}; + end +end + end +endgenerate + +// Record whether we are currently refilling +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + refilling <= `FALSE; + else + refilling <= refill; +end + +// Instruction cache control FSM +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + state <= `LM32_DC_STATE_FLUSH; + flush_set <= {`LM32_DC_TMEM_ADDR_WIDTH{1'b1}}; + refill_request <= `FALSE; + refill_address <= {`LM32_WORD_WIDTH{1'bx}}; + restart_request <= `FALSE; + end + else + begin + case (state) + + // Flush the cache + `LM32_DC_STATE_FLUSH: + begin + if (flush_set == {`LM32_DC_TMEM_ADDR_WIDTH{1'b0}}) + state <= `LM32_DC_STATE_CHECK; + flush_set <= flush_set - 1'b1; + end + + // Check for cache misses + `LM32_DC_STATE_CHECK: + begin + if (stall_a == `FALSE) + restart_request <= `FALSE; + if (miss == `TRUE) + begin + refill_request <= `TRUE; + refill_address <= address_m; + state <= `LM32_DC_STATE_REFILL; + end + else if (dflush == `TRUE) + state <= `LM32_DC_STATE_FLUSH; + end + + // Refill a cache line + `LM32_DC_STATE_REFILL: + begin + refill_request <= `FALSE; + if (refill_ready == `TRUE) + begin + if (last_refill == `TRUE) + begin + restart_request <= `TRUE; + state <= `LM32_DC_STATE_CHECK; + end + end + end + + endcase + end +end + +generate + if (bytes_per_line > 4) + begin +// Refill offset +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + refill_offset <= {addr_offset_width{1'b0}}; + else + begin + case (state) + + // Check for cache misses + `LM32_DC_STATE_CHECK: + begin + if (miss == `TRUE) + refill_offset <= {addr_offset_width{1'b0}}; + end + + // Refill a cache line + `LM32_DC_STATE_REFILL: + begin + if (refill_ready == `TRUE) + refill_offset <= refill_offset + 1'b1; + end + + endcase + end +end + end +endgenerate + +endmodule + +`endif + diff --git a/verilog/lm32/lm32_debug.v b/verilog/lm32/lm32_debug.v new file mode 100644 index 00000000..90c8d20b --- /dev/null +++ b/verilog/lm32/lm32_debug.v @@ -0,0 +1,369 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_debug.v +// Title : Hardware debug registers and associated logic. +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// Version : 3.2 +// : Fixed simulation bug which flares up when number of +// : watchpoints is zero. +// ============================================================================= + +`include "lm32_include.v" + +`ifdef CFG_DEBUG_ENABLED + +// States for single-step FSM +`define LM32_DEBUG_SS_STATE_RNG 2:0 +`define LM32_DEBUG_SS_STATE_IDLE 3'b000 +`define LM32_DEBUG_SS_STATE_WAIT_FOR_RET 3'b001 +`define LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN 3'b010 +`define LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT 3'b011 +`define LM32_DEBUG_SS_STATE_RESTART 3'b100 + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_debug ( + // ----- Inputs ------- + clk_i, + rst_i, + pc_x, + load_x, + store_x, + load_store_address_x, + csr_write_enable_x, + csr_write_data, + csr_x, +`ifdef CFG_HW_DEBUG_ENABLED + jtag_csr_write_enable, + jtag_csr_write_data, + jtag_csr, +`endif +`ifdef LM32_SINGLE_STEP_ENABLED + eret_q_x, + bret_q_x, + stall_x, + exception_x, + q_x, +`ifdef CFG_DCACHE_ENABLED + dcache_refill_request, +`endif +`endif + // ----- Outputs ------- +`ifdef LM32_SINGLE_STEP_ENABLED + dc_ss, +`endif + dc_re, + bp_match, + wp_match + ); + +///////////////////////////////////////////////////// +// Parameters +///////////////////////////////////////////////////// + +parameter breakpoints = 0; // Number of breakpoint CSRs +parameter watchpoints = 0; // Number of watchpoint CSRs + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +input [`LM32_PC_RNG] pc_x; // X stage PC +input load_x; // Load instruction in X stage +input store_x; // Store instruction in X stage +input [`LM32_WORD_RNG] load_store_address_x; // Load or store effective address +input csr_write_enable_x; // wcsr instruction in X stage +input [`LM32_WORD_RNG] csr_write_data; // Data to write to CSR +input [`LM32_CSR_RNG] csr_x; // Which CSR to write +`ifdef CFG_HW_DEBUG_ENABLED +input jtag_csr_write_enable; // JTAG interface CSR write enable +input [`LM32_WORD_RNG] jtag_csr_write_data; // Data to write to CSR +input [`LM32_CSR_RNG] jtag_csr; // Which CSR to write +`endif +`ifdef LM32_SINGLE_STEP_ENABLED +input eret_q_x; // eret instruction in X stage +input bret_q_x; // bret instruction in X stage +input stall_x; // Instruction in X stage is stalled +input exception_x; // An exception has occured in X stage +input q_x; // Indicates the instruction in the X stage is qualified +`ifdef CFG_DCACHE_ENABLED +input dcache_refill_request; // Indicates data cache wants to be refilled +`endif +`endif + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +`ifdef LM32_SINGLE_STEP_ENABLED +output dc_ss; // Single-step enable +reg dc_ss; +`endif +output dc_re; // Remap exceptions +reg dc_re; +output bp_match; // Indicates a breakpoint has matched +wire bp_match; +output wp_match; // Indicates a watchpoint has matched +wire wp_match; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +genvar i; // Loop index for generate statements + +// Debug CSRs + +reg [`LM32_PC_RNG] bp_a[0:breakpoints-1]; // Instruction breakpoint address +reg bp_e[0:breakpoints-1]; // Instruction breakpoint enable +wire [0:breakpoints-1]bp_match_n; // Indicates if a h/w instruction breakpoint matched + +reg [`LM32_WPC_C_RNG] wpc_c[0:watchpoints-1]; // Watchpoint enable +reg [`LM32_WORD_RNG] wp[0:watchpoints-1]; // Watchpoint address +wire [0:watchpoints]wp_match_n; // Indicates if a h/w data watchpoint matched + +wire debug_csr_write_enable; // Debug CSR write enable (from either a wcsr instruction of external debugger) +wire [`LM32_WORD_RNG] debug_csr_write_data; // Data to write to debug CSR +wire [`LM32_CSR_RNG] debug_csr; // Debug CSR to write to + +`ifdef LM32_SINGLE_STEP_ENABLED +// FIXME: Declaring this as a reg causes ModelSim 6.1.15b to crash, so use integer for now +//reg [`LM32_DEBUG_SS_STATE_RNG] state; // State of single-step FSM +integer state; // State of single-step FSM +`endif + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +// Check for breakpoints +generate + for (i = 0; i < breakpoints; i = i + 1) + begin : bp_comb +assign bp_match_n[i] = ((bp_a[i] == pc_x) && (bp_e[i] == `TRUE)); + end +endgenerate +generate +`ifdef LM32_SINGLE_STEP_ENABLED + if (breakpoints > 0) +assign bp_match = (|bp_match_n) || (state == `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT); + else +assign bp_match = state == `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT; +`else + if (breakpoints > 0) +assign bp_match = |bp_match_n; + else +assign bp_match = `FALSE; +`endif +endgenerate + +// Check for watchpoints +generate + for (i = 0; i < watchpoints; i = i + 1) + begin : wp_comb +assign wp_match_n[i] = (wp[i] == load_store_address_x) && ((load_x & wpc_c[i][0]) | (store_x & wpc_c[i][1])); + end +endgenerate +generate + if (watchpoints > 0) +assign wp_match = |wp_match_n; + else +assign wp_match = `FALSE; +endgenerate + +`ifdef CFG_HW_DEBUG_ENABLED +// Multiplex between wcsr instruction writes and debugger writes to the debug CSRs +assign debug_csr_write_enable = (csr_write_enable_x == `TRUE) || (jtag_csr_write_enable == `TRUE); +assign debug_csr_write_data = jtag_csr_write_enable == `TRUE ? jtag_csr_write_data : csr_write_data; +assign debug_csr = jtag_csr_write_enable == `TRUE ? jtag_csr : csr_x; +`else +assign debug_csr_write_enable = csr_write_enable_x; +assign debug_csr_write_data = csr_write_data; +assign debug_csr = csr_x; +`endif + +///////////////////////////////////////////////////// +// Sequential Logic +///////////////////////////////////////////////////// + +// Breakpoint address and enable CSRs +generate + for (i = 0; i < breakpoints; i = i + 1) + begin : bp_seq +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + bp_a[i] <= {`LM32_PC_WIDTH{1'bx}}; + bp_e[i] <= `FALSE; + end + else + begin + if ((debug_csr_write_enable == `TRUE) && (debug_csr == `LM32_CSR_BP0 + i)) + begin + bp_a[i] <= debug_csr_write_data[`LM32_PC_RNG]; + bp_e[i] <= debug_csr_write_data[0]; + end + end +end + end +endgenerate + +// Watchpoint address and control flags CSRs +generate + for (i = 0; i < watchpoints; i = i + 1) + begin : wp_seq +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + wp[i] <= {`LM32_WORD_WIDTH{1'bx}}; + wpc_c[i] <= `LM32_WPC_C_DISABLED; + end + else + begin + if (debug_csr_write_enable == `TRUE) + begin + if (debug_csr == `LM32_CSR_DC) + wpc_c[i] <= debug_csr_write_data[3+i*2:2+i*2]; + if (debug_csr == `LM32_CSR_WP0 + i) + wp[i] <= debug_csr_write_data; + end + end +end + end +endgenerate + +// Remap exceptions control bit +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + dc_re <= `FALSE; + else + begin + if ((debug_csr_write_enable == `TRUE) && (debug_csr == `LM32_CSR_DC)) + dc_re <= debug_csr_write_data[1]; + end +end + +`ifdef LM32_SINGLE_STEP_ENABLED +// Single-step control flag +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + state <= `LM32_DEBUG_SS_STATE_IDLE; + dc_ss <= `FALSE; + end + else + begin + if ((debug_csr_write_enable == `TRUE) && (debug_csr == `LM32_CSR_DC)) + begin + dc_ss <= debug_csr_write_data[0]; + if (debug_csr_write_data[0] == `FALSE) + state <= `LM32_DEBUG_SS_STATE_IDLE; + else + state <= `LM32_DEBUG_SS_STATE_WAIT_FOR_RET; + end + case (state) + `LM32_DEBUG_SS_STATE_WAIT_FOR_RET: + begin + // Wait for eret or bret instruction to be executed + if ( ( (eret_q_x == `TRUE) + || (bret_q_x == `TRUE) + ) + && (stall_x == `FALSE) + ) + state <= `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN; + end + `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN: + begin + // Wait for an instruction to be executed + if ((q_x == `TRUE) && (stall_x == `FALSE)) + state <= `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT; + end + `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT: + begin + // Wait for exception to be raised +`ifdef CFG_DCACHE_ENABLED + if (dcache_refill_request == `TRUE) + state <= `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN; + else +`endif + if ((exception_x == `TRUE) && (q_x == `TRUE) && (stall_x == `FALSE)) + begin + dc_ss <= `FALSE; + state <= `LM32_DEBUG_SS_STATE_RESTART; + end + end + `LM32_DEBUG_SS_STATE_RESTART: + begin + // Watch to see if stepped instruction is restarted due to a cache miss +`ifdef CFG_DCACHE_ENABLED + if (dcache_refill_request == `TRUE) + state <= `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN; + else +`endif + state <= `LM32_DEBUG_SS_STATE_IDLE; + end + endcase + end +end +`endif + +endmodule + +`endif diff --git a/verilog/lm32/lm32_decoder.v b/verilog/lm32/lm32_decoder.v new file mode 100644 index 00000000..eebe5c3e --- /dev/null +++ b/verilog/lm32/lm32_decoder.v @@ -0,0 +1,604 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_decoder.v +// Title : Instruction decoder +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : Support for static branch prediction. Information about +// : branch type is generated and passed on to the predictor. +// Version : 3.2 +// : No change +// Version : 3.3 +// : Renamed port names that conflict with keywords reserved +// : in System-Verilog. +// ============================================================================= + +`include "lm32_include.v" + +// Index of opcode field in an instruction +`define LM32_OPCODE_RNG 31:26 +`define LM32_OP_RNG 30:26 + +// Opcodes - Some are only listed as 5 bits as their MSB is a don't care +`define LM32_OPCODE_ADD 5'b01101 +`define LM32_OPCODE_AND 5'b01000 +`define LM32_OPCODE_ANDHI 6'b011000 +`define LM32_OPCODE_B 6'b110000 +`define LM32_OPCODE_BI 6'b111000 +`define LM32_OPCODE_BE 6'b010001 +`define LM32_OPCODE_BG 6'b010010 +`define LM32_OPCODE_BGE 6'b010011 +`define LM32_OPCODE_BGEU 6'b010100 +`define LM32_OPCODE_BGU 6'b010101 +`define LM32_OPCODE_BNE 6'b010111 +`define LM32_OPCODE_CALL 6'b110110 +`define LM32_OPCODE_CALLI 6'b111110 +`define LM32_OPCODE_CMPE 5'b11001 +`define LM32_OPCODE_CMPG 5'b11010 +`define LM32_OPCODE_CMPGE 5'b11011 +`define LM32_OPCODE_CMPGEU 5'b11100 +`define LM32_OPCODE_CMPGU 5'b11101 +`define LM32_OPCODE_CMPNE 5'b11111 +`define LM32_OPCODE_DIVU 6'b100011 +`define LM32_OPCODE_LB 6'b000100 +`define LM32_OPCODE_LBU 6'b010000 +`define LM32_OPCODE_LH 6'b000111 +`define LM32_OPCODE_LHU 6'b001011 +`define LM32_OPCODE_LW 6'b001010 +`define LM32_OPCODE_MODU 6'b110001 +`define LM32_OPCODE_MUL 5'b00010 +`define LM32_OPCODE_NOR 5'b00001 +`define LM32_OPCODE_OR 5'b01110 +`define LM32_OPCODE_ORHI 6'b011110 +`define LM32_OPCODE_RAISE 6'b101011 +`define LM32_OPCODE_RCSR 6'b100100 +`define LM32_OPCODE_SB 6'b001100 +`define LM32_OPCODE_SEXTB 6'b101100 +`define LM32_OPCODE_SEXTH 6'b110111 +`define LM32_OPCODE_SH 6'b000011 +`define LM32_OPCODE_SL 5'b01111 +`define LM32_OPCODE_SR 5'b00101 +`define LM32_OPCODE_SRU 5'b00000 +`define LM32_OPCODE_SUB 6'b110010 +`define LM32_OPCODE_SW 6'b010110 +`define LM32_OPCODE_USER 6'b110011 +`define LM32_OPCODE_WCSR 6'b110100 +`define LM32_OPCODE_XNOR 5'b01001 +`define LM32_OPCODE_XOR 5'b00110 + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_decoder ( + // ----- Inputs ------- + instruction, + // ----- Outputs ------- + d_result_sel_0, + d_result_sel_1, + x_result_sel_csr, +`ifdef LM32_MC_ARITHMETIC_ENABLED + x_result_sel_mc_arith, +`endif +`ifdef LM32_NO_BARREL_SHIFT + x_result_sel_shift, +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + x_result_sel_sext, +`endif + x_result_sel_logic, +`ifdef CFG_USER_ENABLED + x_result_sel_user, +`endif + x_result_sel_add, + m_result_sel_compare, +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + m_result_sel_shift, +`endif + w_result_sel_load, +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul, +`endif + x_bypass_enable, + m_bypass_enable, + read_enable_0, + read_idx_0, + read_enable_1, + read_idx_1, + write_enable, + write_idx, + immediate, + branch_offset, + load, + store, + size, + sign_extend, + adder_op, + logic_op, +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + direction, +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + shift_left, + shift_right, +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + multiply, +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + divide, + modulus, +`endif + branch, + branch_reg, + condition, + bi_conditional, + bi_unconditional, +`ifdef CFG_DEBUG_ENABLED + break_opcode, +`endif + scall, + eret, +`ifdef CFG_DEBUG_ENABLED + bret, +`endif +`ifdef CFG_USER_ENABLED + user_opcode, +`endif + csr_write_enable + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input [`LM32_INSTRUCTION_RNG] instruction; // Instruction to decode + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [`LM32_D_RESULT_SEL_0_RNG] d_result_sel_0; +reg [`LM32_D_RESULT_SEL_0_RNG] d_result_sel_0; +output [`LM32_D_RESULT_SEL_1_RNG] d_result_sel_1; +reg [`LM32_D_RESULT_SEL_1_RNG] d_result_sel_1; +output x_result_sel_csr; +reg x_result_sel_csr; +`ifdef LM32_MC_ARITHMETIC_ENABLED +output x_result_sel_mc_arith; +reg x_result_sel_mc_arith; +`endif +`ifdef LM32_NO_BARREL_SHIFT +output x_result_sel_shift; +reg x_result_sel_shift; +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED +output x_result_sel_sext; +reg x_result_sel_sext; +`endif +output x_result_sel_logic; +reg x_result_sel_logic; +`ifdef CFG_USER_ENABLED +output x_result_sel_user; +reg x_result_sel_user; +`endif +output x_result_sel_add; +reg x_result_sel_add; +output m_result_sel_compare; +reg m_result_sel_compare; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED +output m_result_sel_shift; +reg m_result_sel_shift; +`endif +output w_result_sel_load; +reg w_result_sel_load; +`ifdef CFG_PL_MULTIPLY_ENABLED +output w_result_sel_mul; +reg w_result_sel_mul; +`endif +output x_bypass_enable; +wire x_bypass_enable; +output m_bypass_enable; +wire m_bypass_enable; +output read_enable_0; +wire read_enable_0; +output [`LM32_REG_IDX_RNG] read_idx_0; +wire [`LM32_REG_IDX_RNG] read_idx_0; +output read_enable_1; +wire read_enable_1; +output [`LM32_REG_IDX_RNG] read_idx_1; +wire [`LM32_REG_IDX_RNG] read_idx_1; +output write_enable; +wire write_enable; +output [`LM32_REG_IDX_RNG] write_idx; +wire [`LM32_REG_IDX_RNG] write_idx; +output [`LM32_WORD_RNG] immediate; +wire [`LM32_WORD_RNG] immediate; +output [`LM32_PC_RNG] branch_offset; +wire [`LM32_PC_RNG] branch_offset; +output load; +wire load; +output store; +wire store; +output [`LM32_SIZE_RNG] size; +wire [`LM32_SIZE_RNG] size; +output sign_extend; +wire sign_extend; +output adder_op; +wire adder_op; +output [`LM32_LOGIC_OP_RNG] logic_op; +wire [`LM32_LOGIC_OP_RNG] logic_op; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED +output direction; +wire direction; +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +output shift_left; +wire shift_left; +output shift_right; +wire shift_right; +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED +output multiply; +wire multiply; +`endif +`ifdef CFG_MC_DIVIDE_ENABLED +output divide; +wire divide; +output modulus; +wire modulus; +`endif +output branch; +wire branch; +output branch_reg; +wire branch_reg; +output [`LM32_CONDITION_RNG] condition; +wire [`LM32_CONDITION_RNG] condition; +output bi_conditional; +wire bi_conditional; +output bi_unconditional; +wire bi_unconditional; +`ifdef CFG_DEBUG_ENABLED +output break_opcode; +wire break_opcode; +`endif +output scall; +wire scall; +output eret; +wire eret; +`ifdef CFG_DEBUG_ENABLED +output bret; +wire bret; +`endif +`ifdef CFG_USER_ENABLED +output [`LM32_USER_OPCODE_RNG] user_opcode; +wire [`LM32_USER_OPCODE_RNG] user_opcode; +`endif +output csr_write_enable; +wire csr_write_enable; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +wire [`LM32_WORD_RNG] extended_immediate; // Zero or sign extended immediate +wire [`LM32_WORD_RNG] high_immediate; // Immediate as high 16 bits +wire [`LM32_WORD_RNG] call_immediate; // Call immediate +wire [`LM32_WORD_RNG] branch_immediate; // Conditional branch immediate +wire sign_extend_immediate; // Whether the immediate should be sign extended (`TRUE) or zero extended (`FALSE) +wire select_high_immediate; // Whether to select the high immediate +wire select_call_immediate; // Whether to select the call immediate + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" + +///////////////////////////////////////////////////// +// Combinational logic +///////////////////////////////////////////////////// + +// Determine opcode +assign op_add = instruction[`LM32_OP_RNG] == `LM32_OPCODE_ADD; +assign op_and = instruction[`LM32_OP_RNG] == `LM32_OPCODE_AND; +assign op_andhi = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_ANDHI; +assign op_b = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_B; +assign op_bi = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BI; +assign op_be = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BE; +assign op_bg = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BG; +assign op_bge = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BGE; +assign op_bgeu = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BGEU; +assign op_bgu = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BGU; +assign op_bne = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BNE; +assign op_call = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_CALL; +assign op_calli = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_CALLI; +assign op_cmpe = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPE; +assign op_cmpg = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPG; +assign op_cmpge = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPGE; +assign op_cmpgeu = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPGEU; +assign op_cmpgu = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPGU; +assign op_cmpne = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPNE; +`ifdef CFG_MC_DIVIDE_ENABLED +assign op_divu = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_DIVU; +`endif +assign op_lb = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LB; +assign op_lbu = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LBU; +assign op_lh = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LH; +assign op_lhu = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LHU; +assign op_lw = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LW; +`ifdef CFG_MC_DIVIDE_ENABLED +assign op_modu = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_MODU; +`endif +`ifdef LM32_MULTIPLY_ENABLED +assign op_mul = instruction[`LM32_OP_RNG] == `LM32_OPCODE_MUL; +`endif +assign op_nor = instruction[`LM32_OP_RNG] == `LM32_OPCODE_NOR; +assign op_or = instruction[`LM32_OP_RNG] == `LM32_OPCODE_OR; +assign op_orhi = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_ORHI; +assign op_raise = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_RAISE; +assign op_rcsr = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_RCSR; +assign op_sb = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SB; +`ifdef CFG_SIGN_EXTEND_ENABLED +assign op_sextb = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SEXTB; +assign op_sexth = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SEXTH; +`endif +assign op_sh = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SH; +`ifdef LM32_BARREL_SHIFT_ENABLED +assign op_sl = instruction[`LM32_OP_RNG] == `LM32_OPCODE_SL; +`endif +assign op_sr = instruction[`LM32_OP_RNG] == `LM32_OPCODE_SR; +assign op_sru = instruction[`LM32_OP_RNG] == `LM32_OPCODE_SRU; +assign op_sub = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SUB; +assign op_sw = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SW; +assign op_user = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_USER; +assign op_wcsr = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_WCSR; +assign op_xnor = instruction[`LM32_OP_RNG] == `LM32_OPCODE_XNOR; +assign op_xor = instruction[`LM32_OP_RNG] == `LM32_OPCODE_XOR; + +// Group opcodes by function +assign arith = op_add | op_sub; +assign logical = op_and | op_andhi | op_nor | op_or | op_orhi | op_xor | op_xnor; +assign cmp = op_cmpe | op_cmpg | op_cmpge | op_cmpgeu | op_cmpgu | op_cmpne; +assign bi_conditional = op_be | op_bg | op_bge | op_bgeu | op_bgu | op_bne; +assign bi_unconditional = op_bi; +assign bra = op_b | bi_unconditional | bi_conditional; +assign call = op_call | op_calli; +`ifdef LM32_BARREL_SHIFT_ENABLED +assign shift = op_sl | op_sr | op_sru; +`endif +`ifdef LM32_NO_BARREL_SHIFT +assign shift = op_sr | op_sru; +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +assign shift_left = op_sl; +assign shift_right = op_sr | op_sru; +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED +assign sext = op_sextb | op_sexth; +`endif +`ifdef LM32_MULTIPLY_ENABLED +assign multiply = op_mul; +`endif +`ifdef CFG_MC_DIVIDE_ENABLED +assign divide = op_divu; +assign modulus = op_modu; +`endif +assign load = op_lb | op_lbu | op_lh | op_lhu | op_lw; +assign store = op_sb | op_sh | op_sw; + +// Select pipeline multiplexor controls +always @(*) +begin + // D stage + if (call) + d_result_sel_0 = `LM32_D_RESULT_SEL_0_NEXT_PC; + else + d_result_sel_0 = `LM32_D_RESULT_SEL_0_REG_0; + if (call) + d_result_sel_1 = `LM32_D_RESULT_SEL_1_ZERO; + else if ((instruction[31] == 1'b0) && !bra) + d_result_sel_1 = `LM32_D_RESULT_SEL_1_IMMEDIATE; + else + d_result_sel_1 = `LM32_D_RESULT_SEL_1_REG_1; + // X stage + x_result_sel_csr = `FALSE; +`ifdef LM32_MC_ARITHMETIC_ENABLED + x_result_sel_mc_arith = `FALSE; +`endif +`ifdef LM32_NO_BARREL_SHIFT + x_result_sel_shift = `FALSE; +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + x_result_sel_sext = `FALSE; +`endif + x_result_sel_logic = `FALSE; +`ifdef CFG_USER_ENABLED + x_result_sel_user = `FALSE; +`endif + x_result_sel_add = `FALSE; + if (op_rcsr) + x_result_sel_csr = `TRUE; +`ifdef LM32_MC_ARITHMETIC_ENABLED +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + else if (shift_left | shift_right) + x_result_sel_mc_arith = `TRUE; +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + else if (divide | modulus) + x_result_sel_mc_arith = `TRUE; +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + else if (multiply) + x_result_sel_mc_arith = `TRUE; +`endif +`endif +`ifdef LM32_NO_BARREL_SHIFT + else if (shift) + x_result_sel_shift = `TRUE; +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + else if (sext) + x_result_sel_sext = `TRUE; +`endif + else if (logical) + x_result_sel_logic = `TRUE; +`ifdef CFG_USER_ENABLED + else if (op_user) + x_result_sel_user = `TRUE; +`endif + else + x_result_sel_add = `TRUE; + + // M stage + + m_result_sel_compare = cmp; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + m_result_sel_shift = shift; +`endif + + // W stage + w_result_sel_load = load; +`ifdef CFG_PL_MULTIPLY_ENABLED + w_result_sel_mul = op_mul; +`endif +end + +// Set if result is valid at end of X stage +assign x_bypass_enable = arith + | logical +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + | shift_left + | shift_right +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + | multiply +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + | divide + | modulus +`endif +`ifdef LM32_NO_BARREL_SHIFT + | shift +`endif +`ifdef CFG_SIGN_EXTEND_ENABLED + | sext +`endif +`ifdef CFG_USER_ENABLED + | op_user +`endif + | op_rcsr + ; +// Set if result is valid at end of M stage +assign m_bypass_enable = x_bypass_enable +`ifdef CFG_PL_BARREL_SHIFT_ENABLED + | shift +`endif + | cmp + ; +// Register file read port 0 +assign read_enable_0 = ~(op_bi | op_calli); +assign read_idx_0 = instruction[25:21]; +// Register file read port 1 +assign read_enable_1 = ~(op_bi | op_calli | load); +assign read_idx_1 = instruction[20:16]; +// Register file write port +assign write_enable = ~(bra | op_raise | store | op_wcsr); +assign write_idx = call + ? 5'd29 + : instruction[31] == 1'b0 + ? instruction[20:16] + : instruction[15:11]; + +// Size of load/stores +assign size = instruction[27:26]; +// Whether to sign or zero extend +assign sign_extend = instruction[28]; +// Set adder_op to 1 to perform a subtraction +assign adder_op = op_sub | op_cmpe | op_cmpg | op_cmpge | op_cmpgeu | op_cmpgu | op_cmpne | bra; +// Logic operation (and, or, etc) +assign logic_op = instruction[29:26]; +`ifdef CFG_PL_BARREL_SHIFT_ENABLED +// Shift direction +assign direction = instruction[29]; +`endif +// Control flow microcodes +assign branch = bra | call; +assign branch_reg = op_call | op_b; +assign condition = instruction[28:26]; +`ifdef CFG_DEBUG_ENABLED +assign break_opcode = op_raise & ~instruction[2]; +`endif +assign scall = op_raise & instruction[2]; +assign eret = op_b & (instruction[25:21] == 5'd30); +`ifdef CFG_DEBUG_ENABLED +assign bret = op_b & (instruction[25:21] == 5'd31); +`endif +`ifdef CFG_USER_ENABLED +// Extract user opcode +assign user_opcode = instruction[10:0]; +`endif +// CSR read/write +assign csr_write_enable = op_wcsr; + +// Extract immediate from instruction + +assign sign_extend_immediate = ~(op_and | op_cmpgeu | op_cmpgu | op_nor | op_or | op_xnor | op_xor); +assign select_high_immediate = op_andhi | op_orhi; +assign select_call_immediate = instruction[31]; + +assign high_immediate = {instruction[15:0], 16'h0000}; +assign extended_immediate = {{16{sign_extend_immediate & instruction[15]}}, instruction[15:0]}; +assign call_immediate = {{6{instruction[25]}}, instruction[25:0]}; +assign branch_immediate = {{16{instruction[15]}}, instruction[15:0]}; + +assign immediate = select_high_immediate == `TRUE + ? high_immediate + : extended_immediate; + +assign branch_offset = select_call_immediate == `TRUE + ? call_immediate + : branch_immediate; + +endmodule + diff --git a/verilog/lm32/lm32_dp_ram.v b/verilog/lm32/lm32_dp_ram.v new file mode 100644 index 00000000..1d7f4f17 --- /dev/null +++ b/verilog/lm32/lm32_dp_ram.v @@ -0,0 +1,35 @@ +module lm32_dp_ram( + clk_i, + rst_i, + we_i, + waddr_i, + wdata_i, + raddr_i, + rdata_o); + +parameter addr_width = 32; +parameter addr_depth = 1024; +parameter data_width = 8; + +input clk_i; +input rst_i; +input we_i; +input [addr_width-1:0] waddr_i; +input [data_width-1:0] wdata_i; +input [addr_width-1:0] raddr_i; +output [data_width-1:0] rdata_o; + +reg [data_width-1:0] ram[addr_depth-1:0]; + +reg [addr_width-1:0] raddr_r; +assign rdata_o = ram[raddr_r]; + +always @ (posedge clk_i) +begin + if (we_i) + ram[waddr_i] <= wdata_i; + raddr_r <= raddr_i; +end + +endmodule + diff --git a/verilog/lm32/lm32_functions.v b/verilog/lm32/lm32_functions.v new file mode 100644 index 00000000..1332a6e5 --- /dev/null +++ b/verilog/lm32/lm32_functions.v @@ -0,0 +1,70 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_functions.v +// Title : Common functions +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.5 +// : Added function to generate log-of-two that rounds-up to +// : power-of-two +// ============================================================================= + +function integer clogb2; +input [31:0] value; +begin + for (clogb2 = 0; value > 0; clogb2 = clogb2 + 1) + value = value >> 1; +end +endfunction + +function integer clogb2_v1; +input [31:0] value; +reg [31:0] i; +reg [31:0] temp; +begin + temp = 0; + i = 0; + for (i = 0; temp < value; i = i + 1) + temp = 1<>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_icache.v +// Title : Instruction cache +// Dependencies : lm32_include.v +// +// Version 3.5 +// 1. Bug Fix: Instruction cache flushes issued from Instruction Inline Memory +// cause segmentation fault due to incorrect fetches. +// +// Version 3.1 +// 1. Feature: Support for user-selected resource usage when implementing +// cache memory. Additional parameters must be defined when invoking module +// lm32_ram. Instruction cache miss mechanism is dependent on branch +// prediction being performed in D stage of pipeline. +// +// Version 7.0SP2, 3.0 +// No change +// ============================================================================= + +`include "lm32_include.v" + +`ifdef CFG_ICACHE_ENABLED + +`define LM32_IC_ADDR_OFFSET_RNG addr_offset_msb:addr_offset_lsb +`define LM32_IC_ADDR_SET_RNG addr_set_msb:addr_set_lsb +`define LM32_IC_ADDR_TAG_RNG addr_tag_msb:addr_tag_lsb +`define LM32_IC_ADDR_IDX_RNG addr_set_msb:addr_offset_lsb + +`define LM32_IC_TMEM_ADDR_WIDTH addr_set_width +`define LM32_IC_TMEM_ADDR_RNG (`LM32_IC_TMEM_ADDR_WIDTH-1):0 +`define LM32_IC_DMEM_ADDR_WIDTH (addr_offset_width+addr_set_width) +`define LM32_IC_DMEM_ADDR_RNG (`LM32_IC_DMEM_ADDR_WIDTH-1):0 + +`define LM32_IC_TAGS_WIDTH (addr_tag_width+1) +`define LM32_IC_TAGS_RNG (`LM32_IC_TAGS_WIDTH-1):0 +`define LM32_IC_TAGS_TAG_RNG (`LM32_IC_TAGS_WIDTH-1):1 +`define LM32_IC_TAGS_VALID_RNG 0 + +`define LM32_IC_STATE_RNG 3:0 +`define LM32_IC_STATE_FLUSH_INIT 4'b0001 +`define LM32_IC_STATE_FLUSH 4'b0010 +`define LM32_IC_STATE_CHECK 4'b0100 +`define LM32_IC_STATE_REFILL 4'b1000 + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_icache ( + // ----- Inputs ----- + clk_i, + rst_i, + stall_a, + stall_f, + address_a, + address_f, + read_enable_f, + refill_ready, + refill_data, + iflush, +`ifdef CFG_IROM_ENABLED + select_f, +`endif + valid_d, + branch_predict_taken_d, + // ----- Outputs ----- + stall_request, + restart_request, + refill_request, + refill_address, + refilling, + inst + ); + +///////////////////////////////////////////////////// +// Parameters +///////////////////////////////////////////////////// + +parameter associativity = 1; // Associativity of the cache (Number of ways) +parameter sets = 512; // Number of sets +parameter bytes_per_line = 16; // Number of bytes per cache line +parameter base_address = 0; // Base address of cachable memory +parameter limit = 0; // Limit (highest address) of cachable memory + +localparam addr_offset_width = clogb2(bytes_per_line)-1-2; +localparam addr_set_width = clogb2(sets)-1; +localparam addr_offset_lsb = 2; +localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1); +localparam addr_set_lsb = (addr_offset_msb+1); +localparam addr_set_msb = (addr_set_lsb+addr_set_width-1); +localparam addr_tag_lsb = (addr_set_msb+1); +localparam addr_tag_msb = clogb2(`CFG_ICACHE_LIMIT-`CFG_ICACHE_BASE_ADDRESS)-1; +localparam addr_tag_width = (addr_tag_msb-addr_tag_lsb+1); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +input stall_a; // Stall instruction in A stage +input stall_f; // Stall instruction in F stage + +input valid_d; // Valid instruction in D stage +input branch_predict_taken_d; // Instruction in D stage is a branch and is predicted taken + +input [`LM32_PC_RNG] address_a; // Address of instruction in A stage +input [`LM32_PC_RNG] address_f; // Address of instruction in F stage +input read_enable_f; // Indicates if cache access is valid + +input refill_ready; // Next word of refill data is ready +input [`LM32_INSTRUCTION_RNG] refill_data; // Data to refill the cache with + +input iflush; // Flush the cache +`ifdef CFG_IROM_ENABLED +input select_f; // Instruction in F stage is mapped through instruction cache +`endif + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output stall_request; // Request to stall the pipeline +wire stall_request; +output restart_request; // Request to restart instruction that caused the cache miss +reg restart_request; +output refill_request; // Request to refill a cache line +wire refill_request; +output [`LM32_PC_RNG] refill_address; // Base address of cache refill +reg [`LM32_PC_RNG] refill_address; +output refilling; // Indicates the instruction cache is currently refilling +reg refilling; +output [`LM32_INSTRUCTION_RNG] inst; // Instruction read from cache +wire [`LM32_INSTRUCTION_RNG] inst; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +wire enable; +wire [0:associativity-1] way_mem_we; +wire [`LM32_INSTRUCTION_RNG] way_data[0:associativity-1]; +wire [`LM32_IC_TAGS_TAG_RNG] way_tag[0:associativity-1]; +wire [0:associativity-1] way_valid; +wire [0:associativity-1] way_match; +wire miss; + +wire [`LM32_IC_TMEM_ADDR_RNG] tmem_read_address; +wire [`LM32_IC_TMEM_ADDR_RNG] tmem_write_address; +wire [`LM32_IC_DMEM_ADDR_RNG] dmem_read_address; +wire [`LM32_IC_DMEM_ADDR_RNG] dmem_write_address; +wire [`LM32_IC_TAGS_RNG] tmem_write_data; + +reg [`LM32_IC_STATE_RNG] state; +wire flushing; +wire check; +wire refill; + +reg [associativity-1:0] refill_way_select; +reg [`LM32_IC_ADDR_OFFSET_RNG] refill_offset; +wire last_refill; +reg [`LM32_IC_TMEM_ADDR_RNG] flush_set; + +genvar i; + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" + +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + + generate + for (i = 0; i < associativity; i = i + 1) + begin : memories + + lm32_ram + #( + // ----- Parameters ------- + .data_width (32), + .address_width (`LM32_IC_DMEM_ADDR_WIDTH) +// Modified for Milkymist: removed non-portable RAM parameters +) + way_0_data_ram + ( + // ----- Inputs ------- + .read_clk (clk_i), + .write_clk (clk_i), + .reset (rst_i), + .read_address (dmem_read_address), + .enable_read (enable), + .write_address (dmem_write_address), + .enable_write (`TRUE), + .write_enable (way_mem_we[i]), + .write_data (refill_data), + // ----- Outputs ------- + .read_data (way_data[i]) + ); + + lm32_ram + #( + // ----- Parameters ------- + .data_width (`LM32_IC_TAGS_WIDTH), + .address_width (`LM32_IC_TMEM_ADDR_WIDTH) +// Modified for Milkymist: removed non-portable RAM parameters + ) + way_0_tag_ram + ( + // ----- Inputs ------- + .read_clk (clk_i), + .write_clk (clk_i), + .reset (rst_i), + .read_address (tmem_read_address), + .enable_read (enable), + .write_address (tmem_write_address), + .enable_write (`TRUE), + .write_enable (way_mem_we[i] | flushing), + .write_data (tmem_write_data), + // ----- Outputs ------- + .read_data ({way_tag[i], way_valid[i]}) + ); + + end +endgenerate + +///////////////////////////////////////////////////// +// Combinational logic +///////////////////////////////////////////////////// + +// Compute which ways in the cache match the address address being read +generate + for (i = 0; i < associativity; i = i + 1) + begin : match +assign way_match[i] = ({way_tag[i], way_valid[i]} == {address_f[`LM32_IC_ADDR_TAG_RNG], `TRUE}); + end +endgenerate + +// Select data from way that matched the address being read +generate + if (associativity == 1) + begin : inst_1 +assign inst = way_match[0] ? way_data[0] : 32'b0; + end + else if (associativity == 2) + begin : inst_2 +assign inst = way_match[0] ? way_data[0] : (way_match[1] ? way_data[1] : 32'b0); + end +endgenerate + +// Compute address to use to index into the data memories +generate + if (bytes_per_line > 4) +assign dmem_write_address = {refill_address[`LM32_IC_ADDR_SET_RNG], refill_offset}; + else +assign dmem_write_address = refill_address[`LM32_IC_ADDR_SET_RNG]; +endgenerate + +assign dmem_read_address = address_a[`LM32_IC_ADDR_IDX_RNG]; + +// Compute address to use to index into the tag memories +assign tmem_read_address = address_a[`LM32_IC_ADDR_SET_RNG]; +assign tmem_write_address = flushing + ? flush_set + : refill_address[`LM32_IC_ADDR_SET_RNG]; + +// Compute signal to indicate when we are on the last refill accesses +generate + if (bytes_per_line > 4) +assign last_refill = refill_offset == {addr_offset_width{1'b1}}; + else +assign last_refill = `TRUE; +endgenerate + +// Compute data and tag memory access enable +assign enable = (stall_a == `FALSE); + +// Compute data and tag memory write enables +generate + if (associativity == 1) + begin : we_1 +assign way_mem_we[0] = (refill_ready == `TRUE); + end + else + begin : we_2 +assign way_mem_we[0] = (refill_ready == `TRUE) && (refill_way_select[0] == `TRUE); +assign way_mem_we[1] = (refill_ready == `TRUE) && (refill_way_select[1] == `TRUE); + end +endgenerate + +// On the last refill cycle set the valid bit, for all other writes it should be cleared +assign tmem_write_data[`LM32_IC_TAGS_VALID_RNG] = last_refill & !flushing; +assign tmem_write_data[`LM32_IC_TAGS_TAG_RNG] = refill_address[`LM32_IC_ADDR_TAG_RNG]; + +// Signals that indicate which state we are in +assign flushing = |state[1:0]; +assign check = state[2]; +assign refill = state[3]; + +assign miss = (~(|way_match)) && (read_enable_f == `TRUE) && (stall_f == `FALSE) && !(valid_d && branch_predict_taken_d); +assign stall_request = (check == `FALSE); +assign refill_request = (refill == `TRUE); + +///////////////////////////////////////////////////// +// Sequential logic +///////////////////////////////////////////////////// + +// Record way selected for replacement on a cache miss +generate + if (associativity >= 2) + begin : way_select +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + refill_way_select <= {{associativity-1{1'b0}}, 1'b1}; + else + begin + if (miss == `TRUE) + refill_way_select <= {refill_way_select[0], refill_way_select[1]}; + end +end + end +endgenerate + +// Record whether we are refilling +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + refilling <= `FALSE; + else + refilling <= refill; +end + +// Instruction cache control FSM +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + state <= `LM32_IC_STATE_FLUSH_INIT; + flush_set <= {`LM32_IC_TMEM_ADDR_WIDTH{1'b1}}; + refill_address <= {`LM32_PC_WIDTH{1'bx}}; + restart_request <= `FALSE; + end + else + begin + case (state) + + // Flush the cache for the first time after reset + `LM32_IC_STATE_FLUSH_INIT: + begin + if (flush_set == {`LM32_IC_TMEM_ADDR_WIDTH{1'b0}}) + state <= `LM32_IC_STATE_CHECK; + flush_set <= flush_set - 1'b1; + end + + // Flush the cache in response to an write to the ICC CSR + `LM32_IC_STATE_FLUSH: + begin + if (flush_set == {`LM32_IC_TMEM_ADDR_WIDTH{1'b0}}) +`ifdef CFG_IROM_ENABLED + if (select_f) + state <= `LM32_IC_STATE_REFILL; + else +`endif + state <= `LM32_IC_STATE_CHECK; + + flush_set <= flush_set - 1'b1; + end + + // Check for cache misses + `LM32_IC_STATE_CHECK: + begin + if (stall_a == `FALSE) + restart_request <= `FALSE; + if (iflush == `TRUE) + begin + refill_address <= address_f; + state <= `LM32_IC_STATE_FLUSH; + end + else if (miss == `TRUE) + begin + refill_address <= address_f; + state <= `LM32_IC_STATE_REFILL; + end + end + + // Refill a cache line + `LM32_IC_STATE_REFILL: + begin + if (refill_ready == `TRUE) + begin + if (last_refill == `TRUE) + begin + restart_request <= `TRUE; + state <= `LM32_IC_STATE_CHECK; + end + end + end + + endcase + end +end + +generate + if (bytes_per_line > 4) + begin +// Refill offset +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + refill_offset <= {addr_offset_width{1'b0}}; + else + begin + case (state) + + // Check for cache misses + `LM32_IC_STATE_CHECK: + begin + if (iflush == `TRUE) + refill_offset <= {addr_offset_width{1'b0}}; + else if (miss == `TRUE) + refill_offset <= {addr_offset_width{1'b0}}; + end + + // Refill a cache line + `LM32_IC_STATE_REFILL: + begin + if (refill_ready == `TRUE) + refill_offset <= refill_offset + 1'b1; + end + + endcase + end +end + end +endgenerate + +endmodule + +`endif + diff --git a/verilog/lm32/lm32_include.v b/verilog/lm32/lm32_include.v new file mode 100644 index 00000000..512c68bb --- /dev/null +++ b/verilog/lm32/lm32_include.v @@ -0,0 +1,377 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_include.v +// Title : CPU global macros +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// Version : 3.2 +// : No Change +// Version : 3.3 +// : Support for extended configuration register +// ============================================================================= + +`ifdef LM32_INCLUDE_V +`else +`define LM32_INCLUDE_V + +// +// Common configuration options +// + +`define CFG_EBA_RESET 32'h00860000 +`define CFG_DEBA_RESET 32'h10000000 + +`define CFG_PL_MULTIPLY_ENABLED +`define CFG_PL_BARREL_SHIFT_ENABLED +`define CFG_SIGN_EXTEND_ENABLED +`define CFG_MC_DIVIDE_ENABLED +`define CFG_EBR_POSEDGE_REGISTER_FILE + +`define CFG_ICACHE_ENABLED +`define CFG_ICACHE_ASSOCIATIVITY 1 +`define CFG_ICACHE_SETS 256 +`define CFG_ICACHE_BYTES_PER_LINE 16 +`define CFG_ICACHE_BASE_ADDRESS 32'h0 +`define CFG_ICACHE_LIMIT 32'h7fffffff + +`define CFG_DCACHE_ENABLED +`define CFG_DCACHE_ASSOCIATIVITY 1 +`define CFG_DCACHE_SETS 256 +`define CFG_DCACHE_BYTES_PER_LINE 16 +`define CFG_DCACHE_BASE_ADDRESS 32'h0 +`define CFG_DCACHE_LIMIT 32'h7fffffff + +// Enable Debugging +//`define CFG_JTAG_ENABLED +//`define CFG_JTAG_UART_ENABLED +//`define CFG_DEBUG_ENABLED +//`define CFG_HW_DEBUG_ENABLED +//`define CFG_ROM_DEBUG_ENABLED +//`define CFG_BREAKPOINTS 32'h4 +//`define CFG_WATCHPOINTS 32'h4 +//`define CFG_EXTERNAL_BREAK_ENABLED +//`define CFG_GDBSTUB_ENABLED + +// +// End of common configuration options +// + +`ifdef TRUE +`else +`define TRUE 1'b1 +`define FALSE 1'b0 +`define TRUE_N 1'b0 +`define FALSE_N 1'b1 +`endif + +// Wishbone configuration +`define CFG_IWB_ENABLED +`define CFG_DWB_ENABLED + +// Data-path width +`define LM32_WORD_WIDTH 32 +`define LM32_WORD_RNG (`LM32_WORD_WIDTH-1):0 +`define LM32_SHIFT_WIDTH 5 +`define LM32_SHIFT_RNG (`LM32_SHIFT_WIDTH-1):0 +`define LM32_BYTE_SELECT_WIDTH 4 +`define LM32_BYTE_SELECT_RNG (`LM32_BYTE_SELECT_WIDTH-1):0 + +// Register file size +`define LM32_REGISTERS 32 +`define LM32_REG_IDX_WIDTH 5 +`define LM32_REG_IDX_RNG (`LM32_REG_IDX_WIDTH-1):0 + +// Standard register numbers +`define LM32_RA_REG `LM32_REG_IDX_WIDTH'd29 +`define LM32_EA_REG `LM32_REG_IDX_WIDTH'd30 +`define LM32_BA_REG `LM32_REG_IDX_WIDTH'd31 + +// Range of Program Counter. Two LSBs are always 0. +`define LM32_PC_WIDTH (`LM32_WORD_WIDTH-2) +`define LM32_PC_RNG (`LM32_PC_WIDTH+2-1):2 + +// Range of an instruction +`define LM32_INSTRUCTION_WIDTH 32 +`define LM32_INSTRUCTION_RNG (`LM32_INSTRUCTION_WIDTH-1):0 + +// Adder operation +`define LM32_ADDER_OP_ADD 1'b0 +`define LM32_ADDER_OP_SUBTRACT 1'b1 + +// Shift direction +`define LM32_SHIFT_OP_RIGHT 1'b0 +`define LM32_SHIFT_OP_LEFT 1'b1 + +// Bus errors +`define CFG_BUS_ERRORS_ENABLED + +// Derive macro that indicates whether we have single-stepping or not +`ifdef CFG_ROM_DEBUG_ENABLED +`define LM32_SINGLE_STEP_ENABLED +`else +`ifdef CFG_HW_DEBUG_ENABLED +`define LM32_SINGLE_STEP_ENABLED +`endif +`endif + +// Derive macro that indicates whether JTAG interface is required +`ifdef CFG_JTAG_UART_ENABLED +`define LM32_JTAG_ENABLED +`else +`ifdef CFG_DEBUG_ENABLED +`define LM32_JTAG_ENABLED +`else +`endif +`endif + +// Derive macro that indicates whether ROM debug is required +`ifdef CFG_GDBSTUB_ENABLED +`define CFG_ROM_DEBUG_ENABLED +`endif + +// Derive macro that indicates whether we have a barrel-shifter or not +`ifdef CFG_PL_BARREL_SHIFT_ENABLED +`define LM32_BARREL_SHIFT_ENABLED +`else // CFG_PL_BARREL_SHIFT_ENABLED +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +`define LM32_BARREL_SHIFT_ENABLED +`else +`define LM32_NO_BARREL_SHIFT +`endif +`endif // CFG_PL_BARREL_SHIFT_ENABLED + +// Derive macro that indicates whether we have a multiplier or not +`ifdef CFG_PL_MULTIPLY_ENABLED +`define LM32_MULTIPLY_ENABLED +`else +`ifdef CFG_MC_MULTIPLY_ENABLED +`define LM32_MULTIPLY_ENABLED +`endif +`endif + +// Derive a macro that indicates whether or not the multi-cycle arithmetic unit is required +`ifdef CFG_MC_DIVIDE_ENABLED +`define LM32_MC_ARITHMETIC_ENABLED +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED +`define LM32_MC_ARITHMETIC_ENABLED +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +`define LM32_MC_ARITHMETIC_ENABLED +`endif + +// Derive macro that indicates if we are using an EBR register file +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE +`define LM32_EBR_REGISTER_FILE +`endif +`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE +`define LM32_EBR_REGISTER_FILE +`endif + +// Revision number +`define LM32_REVISION 6'h02 + +// Logical operations - Function encoded directly in instruction +`define LM32_LOGIC_OP_RNG 3:0 + +// Conditions for conditional branches +`define LM32_CONDITION_WIDTH 3 +`define LM32_CONDITION_RNG (`LM32_CONDITION_WIDTH-1):0 +`define LM32_CONDITION_E 3'b001 +`define LM32_CONDITION_G 3'b010 +`define LM32_CONDITION_GE 3'b011 +`define LM32_CONDITION_GEU 3'b100 +`define LM32_CONDITION_GU 3'b101 +`define LM32_CONDITION_NE 3'b111 +`define LM32_CONDITION_U1 3'b000 +`define LM32_CONDITION_U2 3'b110 + +// Size of load or store instruction - Encoding corresponds to opcode +`define LM32_SIZE_WIDTH 2 +`define LM32_SIZE_RNG 1:0 +`define LM32_SIZE_BYTE 2'b00 +`define LM32_SIZE_HWORD 2'b11 +`define LM32_SIZE_WORD 2'b10 +`define LM32_ADDRESS_LSBS_WIDTH 2 + +// Width and range of a CSR index +`ifdef CFG_DEBUG_ENABLED +`define LM32_CSR_WIDTH 5 +`define LM32_CSR_RNG (`LM32_CSR_WIDTH-1):0 +`else +`ifdef CFG_JTAG_ENABLED +`define LM32_CSR_WIDTH 4 +`define LM32_CSR_RNG (`LM32_CSR_WIDTH-1):0 +`else +`define LM32_CSR_WIDTH 3 +`define LM32_CSR_RNG (`LM32_CSR_WIDTH-1):0 +`endif +`endif + +// CSR indices +`define LM32_CSR_IE `LM32_CSR_WIDTH'h0 +`define LM32_CSR_IM `LM32_CSR_WIDTH'h1 +`define LM32_CSR_IP `LM32_CSR_WIDTH'h2 +`define LM32_CSR_ICC `LM32_CSR_WIDTH'h3 +`define LM32_CSR_DCC `LM32_CSR_WIDTH'h4 +`define LM32_CSR_CC `LM32_CSR_WIDTH'h5 +`define LM32_CSR_CFG `LM32_CSR_WIDTH'h6 +`define LM32_CSR_EBA `LM32_CSR_WIDTH'h7 +`ifdef CFG_DEBUG_ENABLED +`define LM32_CSR_DC `LM32_CSR_WIDTH'h8 +`define LM32_CSR_DEBA `LM32_CSR_WIDTH'h9 +`endif +`define LM32_CSR_CFG2 `LM32_CSR_WIDTH'ha +`ifdef CFG_JTAG_ENABLED +`define LM32_CSR_JTX `LM32_CSR_WIDTH'he +`define LM32_CSR_JRX `LM32_CSR_WIDTH'hf +`endif +`ifdef CFG_DEBUG_ENABLED +`define LM32_CSR_BP0 `LM32_CSR_WIDTH'h10 +`define LM32_CSR_BP1 `LM32_CSR_WIDTH'h11 +`define LM32_CSR_BP2 `LM32_CSR_WIDTH'h12 +`define LM32_CSR_BP3 `LM32_CSR_WIDTH'h13 +`define LM32_CSR_WP0 `LM32_CSR_WIDTH'h18 +`define LM32_CSR_WP1 `LM32_CSR_WIDTH'h19 +`define LM32_CSR_WP2 `LM32_CSR_WIDTH'h1a +`define LM32_CSR_WP3 `LM32_CSR_WIDTH'h1b +`endif + +// Values for WPC CSR +`define LM32_WPC_C_RNG 1:0 +`define LM32_WPC_C_DISABLED 2'b00 +`define LM32_WPC_C_READ 2'b01 +`define LM32_WPC_C_WRITE 2'b10 +`define LM32_WPC_C_READ_WRITE 2'b11 + +// Exception IDs +`define LM32_EID_WIDTH 3 +`define LM32_EID_RNG (`LM32_EID_WIDTH-1):0 +`define LM32_EID_RESET 3'h0 +`define LM32_EID_BREAKPOINT 3'd1 +`define LM32_EID_INST_BUS_ERROR 3'h2 +`define LM32_EID_WATCHPOINT 3'd3 +`define LM32_EID_DATA_BUS_ERROR 3'h4 +`define LM32_EID_DIVIDE_BY_ZERO 3'h5 +`define LM32_EID_INTERRUPT 3'h6 +`define LM32_EID_SCALL 3'h7 + +// Pipeline result selection mux controls + +`define LM32_D_RESULT_SEL_0_RNG 0:0 +`define LM32_D_RESULT_SEL_0_REG_0 1'b0 +`define LM32_D_RESULT_SEL_0_NEXT_PC 1'b1 + +`define LM32_D_RESULT_SEL_1_RNG 1:0 +`define LM32_D_RESULT_SEL_1_ZERO 2'b00 +`define LM32_D_RESULT_SEL_1_REG_1 2'b01 +`define LM32_D_RESULT_SEL_1_IMMEDIATE 2'b10 + +`define LM32_USER_OPCODE_WIDTH 11 +`define LM32_USER_OPCODE_RNG (`LM32_USER_OPCODE_WIDTH-1):0 + +// Derive a macro to indicate if either of the caches are implemented +`ifdef CFG_ICACHE_ENABLED +`define LM32_CACHE_ENABLED +`else +`ifdef CFG_DCACHE_ENABLED +`define LM32_CACHE_ENABLED +`endif +`endif + +///////////////////////////////////////////////////// +// Interrupts +///////////////////////////////////////////////////// + +// Always enable interrupts +`define CFG_INTERRUPTS_ENABLED + +// Currently this is fixed to 32 and should not be changed +`define CFG_INTERRUPTS 32 +`define LM32_INTERRUPT_WIDTH `CFG_INTERRUPTS +`define LM32_INTERRUPT_RNG (`LM32_INTERRUPT_WIDTH-1):0 + +///////////////////////////////////////////////////// +// General +///////////////////////////////////////////////////// + +// Sub-word range types +`define LM32_BYTE_WIDTH 8 +`define LM32_BYTE_RNG 7:0 +`define LM32_HWORD_WIDTH 16 +`define LM32_HWORD_RNG 15:0 + +// Word sub-byte indicies +`define LM32_BYTE_0_RNG 7:0 +`define LM32_BYTE_1_RNG 15:8 +`define LM32_BYTE_2_RNG 23:16 +`define LM32_BYTE_3_RNG 31:24 + +// Word sub-halfword indices +`define LM32_HWORD_0_RNG 15:0 +`define LM32_HWORD_1_RNG 31:16 + +// Use a synchronous reset +`define CFG_RESET_SENSITIVITY + +// Wishbone defines +// Refer to Wishbone System-on-Chip Interconnection Architecture +// These should probably be moved to a Wishbone common file + +// Wishbone cycle types +`define LM32_CTYPE_WIDTH 3 +`define LM32_CTYPE_RNG (`LM32_CTYPE_WIDTH-1):0 +`define LM32_CTYPE_CLASSIC 3'b000 +`define LM32_CTYPE_CONSTANT 3'b001 +`define LM32_CTYPE_INCREMENTING 3'b010 +`define LM32_CTYPE_END 3'b111 + +// Wishbone burst types +`define LM32_BTYPE_WIDTH 2 +`define LM32_BTYPE_RNG (`LM32_BTYPE_WIDTH-1):0 +`define LM32_BTYPE_LINEAR 2'b00 +`define LM32_BTYPE_4_BEAT 2'b01 +`define LM32_BTYPE_8_BEAT 2'b10 +`define LM32_BTYPE_16_BEAT 2'b11 + +`endif diff --git a/verilog/lm32/lm32_instruction_unit.v b/verilog/lm32/lm32_instruction_unit.v new file mode 100644 index 00000000..10a2d9c9 --- /dev/null +++ b/verilog/lm32/lm32_instruction_unit.v @@ -0,0 +1,889 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_instruction_unit.v +// Title : Instruction unit +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : Support for static branch prediction is added. Fetching of +// : instructions can also be altered by branches predicted in D +// : stage of pipeline, and mispredicted branches in the X and M +// : stages of the pipeline. +// Version : 3.2 +// : EBRs use SYNC resets instead of ASYNC resets. +// Version : 3.3 +// : Support for a non-cacheable Instruction Memory that has a +// : single-cycle access latency. This memory can be accessed by +// : data port of LM32 (so that debugger has access to it). +// Version : 3.4 +// : No change +// Version : 3.5 +// : Bug fix: Inline memory is correctly generated if it is not a +// : power-of-two. +// : Bug fix: Fixed a bug that caused LM32 (configured without +// : instruction cache) to lock up in to an infinite loop due to a +// : instruction bus error when EBA was set to instruction inline +// : memory. +// Version : 3.8 +// : Feature: Support for dynamically switching EBA to DEBA via a +// : GPIO. +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_instruction_unit ( + // ----- Inputs ------- + clk_i, + rst_i, +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + at_debug, + `endif +`endif + // From pipeline + stall_a, + stall_f, + stall_d, + stall_x, + stall_m, + valid_f, + valid_d, + kill_f, + branch_predict_taken_d, + branch_predict_address_d, +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH + branch_taken_x, + branch_target_x, +`endif + exception_m, + branch_taken_m, + branch_mispredict_taken_m, + branch_target_m, +`ifdef CFG_ICACHE_ENABLED + iflush, +`endif +`ifdef CFG_DCACHE_ENABLED + dcache_restart_request, + dcache_refill_request, + dcache_refilling, +`endif +`ifdef CFG_IROM_ENABLED + irom_store_data_m, + irom_address_xm, + irom_we_xm, +`endif +`ifdef CFG_IWB_ENABLED + // From Wishbone + i_dat_i, + i_ack_i, + i_err_i, +`endif +`ifdef CFG_HW_DEBUG_ENABLED + jtag_read_enable, + jtag_write_enable, + jtag_write_data, + jtag_address, +`endif + // ----- Outputs ------- + // To pipeline + pc_f, + pc_d, + pc_x, + pc_m, + pc_w, +`ifdef CFG_ICACHE_ENABLED + icache_stall_request, + icache_restart_request, + icache_refill_request, + icache_refilling, +`endif +`ifdef CFG_IROM_ENABLED + irom_data_m, +`endif +`ifdef CFG_IWB_ENABLED + // To Wishbone + i_dat_o, + i_adr_o, + i_cyc_o, + i_sel_o, + i_stb_o, + i_we_o, + i_cti_o, + i_lock_o, + i_bte_o, +`endif +`ifdef CFG_HW_DEBUG_ENABLED + jtag_read_data, + jtag_access_complete, +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_d, +`endif +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE + instruction_f, +`endif + instruction_d + ); + +///////////////////////////////////////////////////// +// Parameters +///////////////////////////////////////////////////// + +parameter associativity = 1; // Associativity of the cache (Number of ways) +parameter sets = 512; // Number of sets +parameter bytes_per_line = 16; // Number of bytes per cache line +parameter base_address = 0; // Base address of cachable memory +parameter limit = 0; // Limit (highest address) of cachable memory + +// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used +localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2; +localparam addr_offset_lsb = 2; +localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + input at_debug; // GPIO input that maps EBA to DEBA + `endif +`endif + +input stall_a; // Stall A stage instruction +input stall_f; // Stall F stage instruction +input stall_d; // Stall D stage instruction +input stall_x; // Stall X stage instruction +input stall_m; // Stall M stage instruction +input valid_f; // Instruction in F stage is valid +input valid_d; // Instruction in D stage is valid +input kill_f; // Kill instruction in F stage + +input branch_predict_taken_d; // Branch is predicted taken in D stage +input [`LM32_PC_RNG] branch_predict_address_d; // Branch target address + +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH +input branch_taken_x; // Branch instruction in X stage is taken +input [`LM32_PC_RNG] branch_target_x; // Target PC of X stage branch instruction +`endif +input exception_m; +input branch_taken_m; // Branch instruction in M stage is taken +input branch_mispredict_taken_m; // Branch instruction in M stage is mispredicted as taken +input [`LM32_PC_RNG] branch_target_m; // Target PC of M stage branch instruction + +`ifdef CFG_ICACHE_ENABLED +input iflush; // Flush instruction cache +`endif +`ifdef CFG_DCACHE_ENABLED +input dcache_restart_request; // Restart instruction that caused a data cache miss +input dcache_refill_request; // Request to refill data cache +input dcache_refilling; +`endif + +`ifdef CFG_IROM_ENABLED +input [`LM32_WORD_RNG] irom_store_data_m; // Data from load-store unit +input [`LM32_WORD_RNG] irom_address_xm; // Address from load-store unit +input irom_we_xm; // Indicates if memory operation is load or store +`endif + +`ifdef CFG_IWB_ENABLED +input [`LM32_WORD_RNG] i_dat_i; // Instruction Wishbone interface read data +input i_ack_i; // Instruction Wishbone interface acknowledgement +input i_err_i; // Instruction Wishbone interface error +`endif + +`ifdef CFG_HW_DEBUG_ENABLED +input jtag_read_enable; // JTAG read memory request +input jtag_write_enable; // JTAG write memory request +input [`LM32_BYTE_RNG] jtag_write_data; // JTAG wrirte data +input [`LM32_WORD_RNG] jtag_address; // JTAG read/write address +`endif + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [`LM32_PC_RNG] pc_f; // F stage PC +reg [`LM32_PC_RNG] pc_f; +output [`LM32_PC_RNG] pc_d; // D stage PC +reg [`LM32_PC_RNG] pc_d; +output [`LM32_PC_RNG] pc_x; // X stage PC +reg [`LM32_PC_RNG] pc_x; +output [`LM32_PC_RNG] pc_m; // M stage PC +reg [`LM32_PC_RNG] pc_m; +output [`LM32_PC_RNG] pc_w; // W stage PC +reg [`LM32_PC_RNG] pc_w; + +`ifdef CFG_ICACHE_ENABLED +output icache_stall_request; // Instruction cache stall request +wire icache_stall_request; +output icache_restart_request; // Request to restart instruction that cached instruction cache miss +wire icache_restart_request; +output icache_refill_request; // Instruction cache refill request +wire icache_refill_request; +output icache_refilling; // Indicates the icache is refilling +wire icache_refilling; +`endif + +`ifdef CFG_IROM_ENABLED +output [`LM32_WORD_RNG] irom_data_m; // Data to load-store unit on load +wire [`LM32_WORD_RNG] irom_data_m; +`endif + +`ifdef CFG_IWB_ENABLED +output [`LM32_WORD_RNG] i_dat_o; // Instruction Wishbone interface write data +`ifdef CFG_HW_DEBUG_ENABLED +reg [`LM32_WORD_RNG] i_dat_o; +`else +wire [`LM32_WORD_RNG] i_dat_o; +`endif +output [`LM32_WORD_RNG] i_adr_o; // Instruction Wishbone interface address +reg [`LM32_WORD_RNG] i_adr_o; +output i_cyc_o; // Instruction Wishbone interface cycle +reg i_cyc_o; +output [`LM32_BYTE_SELECT_RNG] i_sel_o; // Instruction Wishbone interface byte select +`ifdef CFG_HW_DEBUG_ENABLED +reg [`LM32_BYTE_SELECT_RNG] i_sel_o; +`else +wire [`LM32_BYTE_SELECT_RNG] i_sel_o; +`endif +output i_stb_o; // Instruction Wishbone interface strobe +reg i_stb_o; +output i_we_o; // Instruction Wishbone interface write enable +`ifdef CFG_HW_DEBUG_ENABLED +reg i_we_o; +`else +wire i_we_o; +`endif +output [`LM32_CTYPE_RNG] i_cti_o; // Instruction Wishbone interface cycle type +reg [`LM32_CTYPE_RNG] i_cti_o; +output i_lock_o; // Instruction Wishbone interface lock bus +reg i_lock_o; +output [`LM32_BTYPE_RNG] i_bte_o; // Instruction Wishbone interface burst type +wire [`LM32_BTYPE_RNG] i_bte_o; +`endif + +`ifdef CFG_HW_DEBUG_ENABLED +output [`LM32_BYTE_RNG] jtag_read_data; // Data read for JTAG interface +reg [`LM32_BYTE_RNG] jtag_read_data; +output jtag_access_complete; // Requested memory access by JTAG interface is complete +wire jtag_access_complete; +`endif + +`ifdef CFG_BUS_ERRORS_ENABLED +output bus_error_d; // Indicates a bus error occured while fetching the instruction +reg bus_error_d; +`endif +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE +output [`LM32_INSTRUCTION_RNG] instruction_f; // F stage instruction (only to have register indices extracted from) +wire [`LM32_INSTRUCTION_RNG] instruction_f; +`endif +output [`LM32_INSTRUCTION_RNG] instruction_d; // D stage instruction to be decoded +reg [`LM32_INSTRUCTION_RNG] instruction_d; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +reg [`LM32_PC_RNG] pc_a; // A stage PC + +`ifdef LM32_CACHE_ENABLED +reg [`LM32_PC_RNG] restart_address; // Address to restart from after a cache miss +`endif + +`ifdef CFG_ICACHE_ENABLED +wire icache_read_enable_f; // Indicates if instruction cache miss is valid +wire [`LM32_PC_RNG] icache_refill_address; // Address that caused cache miss +reg icache_refill_ready; // Indicates when next word of refill data is ready to be written to cache +reg [`LM32_INSTRUCTION_RNG] icache_refill_data; // Next word of refill data, fetched from Wishbone +wire [`LM32_INSTRUCTION_RNG] icache_data_f; // Instruction fetched from instruction cache +wire [`LM32_CTYPE_RNG] first_cycle_type; // First Wishbone cycle type +wire [`LM32_CTYPE_RNG] next_cycle_type; // Next Wishbone cycle type +wire last_word; // Indicates if this is the last word in the cache line +wire [`LM32_PC_RNG] first_address; // First cache refill address +`else +`ifdef CFG_IWB_ENABLED +reg [`LM32_INSTRUCTION_RNG] wb_data_f; // Instruction fetched from Wishbone +`endif +`endif +`ifdef CFG_IROM_ENABLED +wire irom_select_a; // Indicates if A stage PC maps to a ROM address +reg irom_select_f; // Indicates if F stage PC maps to a ROM address +wire [`LM32_INSTRUCTION_RNG] irom_data_f; // Instruction fetched from ROM +`endif +`ifdef CFG_EBR_POSEDGE_REGISTER_FILE +`else +wire [`LM32_INSTRUCTION_RNG] instruction_f; // F stage instruction +`endif +`ifdef CFG_BUS_ERRORS_ENABLED +reg bus_error_f; // Indicates if a bus error occured while fetching the instruction in the F stage +`endif + +`ifdef CFG_HW_DEBUG_ENABLED +reg jtag_access; // Indicates if a JTAG WB access is in progress +`endif + +`ifdef CFG_ALTERNATE_EBA +reg alternate_eba_taken; +`endif + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" + +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + +// Instruction ROM +`ifdef CFG_IROM_ENABLED + pmi_ram_dp_true + #( + // ----- Parameters ------- + .pmi_family (`LATTICE_FAMILY), + + //.pmi_addr_depth_a (1 << (clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)), + //.pmi_addr_width_a ((clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)), + //.pmi_data_width_a (`LM32_WORD_WIDTH), + //.pmi_addr_depth_b (1 << (clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)), + //.pmi_addr_width_b ((clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)), + //.pmi_data_width_b (`LM32_WORD_WIDTH), + + .pmi_addr_depth_a (`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1), + .pmi_addr_width_a (clogb2_v1(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)), + .pmi_data_width_a (`LM32_WORD_WIDTH), + .pmi_addr_depth_b (`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1), + .pmi_addr_width_b (clogb2_v1(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)), + .pmi_data_width_b (`LM32_WORD_WIDTH), + + .pmi_regmode_a ("noreg"), + .pmi_regmode_b ("noreg"), + .pmi_gsr ("enable"), + .pmi_resetmode ("sync"), + .pmi_init_file (`CFG_IROM_INIT_FILE), + .pmi_init_file_format (`CFG_IROM_INIT_FILE_FORMAT), + .module_type ("pmi_ram_dp_true") + ) + ram ( + // ----- Inputs ------- + .ClockA (clk_i), + .ClockB (clk_i), + .ResetA (rst_i), + .ResetB (rst_i), + .DataInA ({32{1'b0}}), + .DataInB (irom_store_data_m), + .AddressA (pc_a[(clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)+2-1:2]), + .AddressB (irom_address_xm[(clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)+2-1:2]), + .ClockEnA (!stall_a), + .ClockEnB (!stall_x || !stall_m), + .WrA (`FALSE), + .WrB (irom_we_xm), + // ----- Outputs ------- + .QA (irom_data_f), + .QB (irom_data_m) + ); +`endif + +`ifdef CFG_ICACHE_ENABLED +// Instruction cache +lm32_icache #( + .associativity (associativity), + .sets (sets), + .bytes_per_line (bytes_per_line), + .base_address (base_address), + .limit (limit) + ) icache ( + // ----- Inputs ----- + .clk_i (clk_i), + .rst_i (rst_i), + .stall_a (stall_a), + .stall_f (stall_f), + .branch_predict_taken_d (branch_predict_taken_d), + .valid_d (valid_d), + .address_a (pc_a), + .address_f (pc_f), + .read_enable_f (icache_read_enable_f), + .refill_ready (icache_refill_ready), + .refill_data (icache_refill_data), + .iflush (iflush), + // ----- Outputs ----- + .stall_request (icache_stall_request), + .restart_request (icache_restart_request), + .refill_request (icache_refill_request), + .refill_address (icache_refill_address), + .refilling (icache_refilling), + .inst (icache_data_f) + ); +`endif + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +`ifdef CFG_ICACHE_ENABLED +// Generate signal that indicates when instruction cache misses are valid +assign icache_read_enable_f = (valid_f == `TRUE) + && (kill_f == `FALSE) +`ifdef CFG_DCACHE_ENABLED + && (dcache_restart_request == `FALSE) +`endif +`ifdef CFG_IROM_ENABLED + && (irom_select_f == `FALSE) +`endif + ; +`endif + +// Compute address of next instruction to fetch +always @(*) +begin + // The request from the latest pipeline stage must take priority +`ifdef CFG_DCACHE_ENABLED + if (dcache_restart_request == `TRUE) + pc_a = restart_address; + else +`endif + if (branch_taken_m == `TRUE) + if ((branch_mispredict_taken_m == `TRUE) && (exception_m == `FALSE)) + pc_a = pc_x; + else + pc_a = branch_target_m; +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH + else if (branch_taken_x == `TRUE) + pc_a = branch_target_x; +`endif + else + if ( (valid_d == `TRUE) && (branch_predict_taken_d == `TRUE) ) + pc_a = branch_predict_address_d; + else +`ifdef CFG_ICACHE_ENABLED + if (icache_restart_request == `TRUE) + pc_a = restart_address; + else +`endif + pc_a = pc_f + 1'b1; +end + +// Select where instruction should be fetched from +`ifdef CFG_IROM_ENABLED +assign irom_select_a = ({pc_a, 2'b00} >= `CFG_IROM_BASE_ADDRESS) && ({pc_a, 2'b00} <= `CFG_IROM_LIMIT); +`endif + +// Select instruction from selected source +`ifdef CFG_ICACHE_ENABLED +`ifdef CFG_IROM_ENABLED +assign instruction_f = irom_select_f == `TRUE ? irom_data_f : icache_data_f; +`else +assign instruction_f = icache_data_f; +`endif +`else +`ifdef CFG_IROM_ENABLED +`ifdef CFG_IWB_ENABLED +assign instruction_f = irom_select_f == `TRUE ? irom_data_f : wb_data_f; +`else +assign instruction_f = irom_data_f; +`endif +`else +assign instruction_f = wb_data_f; +`endif +`endif + +// Unused/constant Wishbone signals +`ifdef CFG_IWB_ENABLED +`ifdef CFG_HW_DEBUG_ENABLED +`else +assign i_dat_o = 32'd0; +assign i_we_o = `FALSE; +assign i_sel_o = 4'b1111; +`endif +assign i_bte_o = `LM32_BTYPE_LINEAR; +`endif + +`ifdef CFG_ICACHE_ENABLED +// Determine parameters for next cache refill Wishbone access +generate + case (bytes_per_line) + 4: + begin +assign first_cycle_type = `LM32_CTYPE_END; +assign next_cycle_type = `LM32_CTYPE_END; +assign last_word = `TRUE; +assign first_address = icache_refill_address; + end + 8: + begin +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; +assign next_cycle_type = `LM32_CTYPE_END; +assign last_word = i_adr_o[addr_offset_msb:addr_offset_lsb] == 1'b1; +assign first_address = {icache_refill_address[`LM32_PC_WIDTH+2-1:addr_offset_msb+1], {addr_offset_width{1'b0}}}; + end + 16: + begin +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; +assign next_cycle_type = i_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING; +assign last_word = i_adr_o[addr_offset_msb:addr_offset_lsb] == 2'b11; +assign first_address = {icache_refill_address[`LM32_PC_WIDTH+2-1:addr_offset_msb+1], {addr_offset_width{1'b0}}}; + end + endcase +endgenerate +`endif + +///////////////////////////////////////////////////// +// Sequential Logic +///////////////////////////////////////////////////// + +// PC +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + if (at_debug == `TRUE) + pc_f <= (`CFG_DEBA_RESET-4)/4; + else + pc_f <= (`CFG_EBA_RESET-4)/4; + `else + pc_f <= (`CFG_EBA_RESET-4)/4; + `endif +`else + pc_f <= (`CFG_EBA_RESET-4)/4; +`endif + pc_d <= {`LM32_PC_WIDTH{1'b0}}; + pc_x <= {`LM32_PC_WIDTH{1'b0}}; + pc_m <= {`LM32_PC_WIDTH{1'b0}}; + pc_w <= {`LM32_PC_WIDTH{1'b0}}; + end + else + begin + if (stall_f == `FALSE) + pc_f <= pc_a; + if (stall_d == `FALSE) + pc_d <= pc_f; + if (stall_x == `FALSE) + pc_x <= pc_d; + if (stall_m == `FALSE) + pc_m <= pc_x; + pc_w <= pc_m; + end +end + +`ifdef LM32_CACHE_ENABLED +// Address to restart from after a cache miss has been handled +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + restart_address <= {`LM32_PC_WIDTH{1'b0}}; + else + begin +`ifdef CFG_DCACHE_ENABLED +`ifdef CFG_ICACHE_ENABLED + // D-cache restart address must take priority, otherwise instructions will be lost + if (dcache_refill_request == `TRUE) + restart_address <= pc_w; + else if ((icache_refill_request == `TRUE) && (!dcache_refilling) && (!dcache_restart_request)) + restart_address <= icache_refill_address; +`else + if (dcache_refill_request == `TRUE) + restart_address <= pc_w; +`endif +`else +`ifdef CFG_ICACHE_ENABLED + if (icache_refill_request == `TRUE) + restart_address <= icache_refill_address; +`endif +`endif + end +end +`endif + +// Record where instruction was fetched from +`ifdef CFG_IROM_ENABLED +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + irom_select_f <= `FALSE; + else + begin + if (stall_f == `FALSE) + irom_select_f <= irom_select_a; + end +end +`endif + +`ifdef CFG_HW_DEBUG_ENABLED +assign jtag_access_complete = (i_cyc_o == `TRUE) && ((i_ack_i == `TRUE) || (i_err_i == `TRUE)) && (jtag_access == `TRUE); +always @(*) +begin + case (jtag_address[1:0]) + 2'b00: jtag_read_data = i_dat_i[`LM32_BYTE_3_RNG]; + 2'b01: jtag_read_data = i_dat_i[`LM32_BYTE_2_RNG]; + 2'b10: jtag_read_data = i_dat_i[`LM32_BYTE_1_RNG]; + 2'b11: jtag_read_data = i_dat_i[`LM32_BYTE_0_RNG]; + endcase +end +`endif + +`ifdef CFG_IWB_ENABLED +// Instruction Wishbone interface +`ifdef CFG_ICACHE_ENABLED +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + i_cyc_o <= `FALSE; + i_stb_o <= `FALSE; + i_adr_o <= {`LM32_WORD_WIDTH{1'b0}}; + i_cti_o <= `LM32_CTYPE_END; + i_lock_o <= `FALSE; + icache_refill_data <= {`LM32_INSTRUCTION_WIDTH{1'b0}}; + icache_refill_ready <= `FALSE; +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_f <= `FALSE; +`endif +`ifdef CFG_HW_DEBUG_ENABLED + i_we_o <= `FALSE; + i_sel_o <= 4'b1111; + jtag_access <= `FALSE; +`endif + end + else + begin + icache_refill_ready <= `FALSE; + // Is a cycle in progress? + if (i_cyc_o == `TRUE) + begin + // Has cycle completed? + if ((i_ack_i == `TRUE) || (i_err_i == `TRUE)) + begin +`ifdef CFG_HW_DEBUG_ENABLED + if (jtag_access == `TRUE) + begin + i_cyc_o <= `FALSE; + i_stb_o <= `FALSE; + i_we_o <= `FALSE; + jtag_access <= `FALSE; + end + else +`endif + begin + if (last_word == `TRUE) + begin + // Cache line fill complete + i_cyc_o <= `FALSE; + i_stb_o <= `FALSE; + i_lock_o <= `FALSE; + end + // Fetch next word in cache line + i_adr_o[addr_offset_msb:addr_offset_lsb] <= i_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1; + i_cti_o <= next_cycle_type; + // Write fetched data into instruction cache + icache_refill_ready <= `TRUE; + icache_refill_data <= i_dat_i; + end + end +`ifdef CFG_BUS_ERRORS_ENABLED + if (i_err_i == `TRUE) + begin + bus_error_f <= `TRUE; + $display ("Instruction bus error. Address: %x", i_adr_o); + end +`endif + end + else + begin + if ((icache_refill_request == `TRUE) && (icache_refill_ready == `FALSE)) + begin + // Read first word of cache line +`ifdef CFG_HW_DEBUG_ENABLED + i_sel_o <= 4'b1111; +`endif + i_adr_o <= {first_address, 2'b00}; + i_cyc_o <= `TRUE; + i_stb_o <= `TRUE; + i_cti_o <= first_cycle_type; + //i_lock_o <= `TRUE; +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_f <= `FALSE; +`endif + end +`ifdef CFG_HW_DEBUG_ENABLED + else + begin + if ((jtag_read_enable == `TRUE) || (jtag_write_enable == `TRUE)) + begin + case (jtag_address[1:0]) + 2'b00: i_sel_o <= 4'b1000; + 2'b01: i_sel_o <= 4'b0100; + 2'b10: i_sel_o <= 4'b0010; + 2'b11: i_sel_o <= 4'b0001; + endcase + i_adr_o <= jtag_address; + i_dat_o <= {4{jtag_write_data}}; + i_cyc_o <= `TRUE; + i_stb_o <= `TRUE; + i_we_o <= jtag_write_enable; + i_cti_o <= `LM32_CTYPE_END; + jtag_access <= `TRUE; + end + end +`endif +`ifdef CFG_BUS_ERRORS_ENABLED + // Clear bus error when exception taken, otherwise they would be + // continually generated if exception handler is cached +`ifdef CFG_FAST_UNCONDITIONAL_BRANCH + if (branch_taken_x == `TRUE) + bus_error_f <= `FALSE; +`endif + if (branch_taken_m == `TRUE) + bus_error_f <= `FALSE; +`endif + end + end +end +`else +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + i_cyc_o <= `FALSE; + i_stb_o <= `FALSE; + i_adr_o <= {`LM32_WORD_WIDTH{1'b0}}; + i_cti_o <= `LM32_CTYPE_END; + i_lock_o <= `FALSE; + wb_data_f <= {`LM32_INSTRUCTION_WIDTH{1'b0}}; +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_f <= `FALSE; +`endif + end + else + begin + // Is a cycle in progress? + if (i_cyc_o == `TRUE) + begin + // Has cycle completed? + if((i_ack_i == `TRUE) || (i_err_i == `TRUE)) + begin + // Cycle complete + i_cyc_o <= `FALSE; + i_stb_o <= `FALSE; + // Register fetched instruction + wb_data_f <= i_dat_i; + end +`ifdef CFG_BUS_ERRORS_ENABLED + if (i_err_i == `TRUE) + begin + bus_error_f <= `TRUE; + $display ("Instruction bus error. Address: %x", i_adr_o); + end +`endif + end + else + begin + // Wait for an instruction fetch from an external address + if ( (stall_a == `FALSE) +`ifdef CFG_IROM_ENABLED + && (irom_select_a == `FALSE) +`endif + ) + begin + // Fetch instruction +`ifdef CFG_HW_DEBUG_ENABLED + i_sel_o <= 4'b1111; +`endif + i_adr_o <= {pc_a, 2'b00}; + i_cyc_o <= `TRUE; + i_stb_o <= `TRUE; +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_f <= `FALSE; +`endif + end + else + begin + if ( (stall_a == `FALSE) +`ifdef CFG_IROM_ENABLED + && (irom_select_a == `TRUE) +`endif + ) + begin +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_f <= `FALSE; +`endif + end + end + end + end +end +`endif +`endif + +// Instruction register +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + instruction_d <= {`LM32_INSTRUCTION_WIDTH{1'b0}}; +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_d <= `FALSE; +`endif + end + else + begin + if (stall_d == `FALSE) + begin + instruction_d <= instruction_f; +`ifdef CFG_BUS_ERRORS_ENABLED + bus_error_d <= bus_error_f; +`endif + end + end +end + +endmodule diff --git a/verilog/lm32/lm32_interrupt.v b/verilog/lm32/lm32_interrupt.v new file mode 100644 index 00000000..41f9a146 --- /dev/null +++ b/verilog/lm32/lm32_interrupt.v @@ -0,0 +1,356 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_interrupt.v +// Title : Interrupt logic +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_interrupt ( + // ----- Inputs ------- + clk_i, + rst_i, + // From external devices + interrupt, + // From pipeline + stall_x, +`ifdef CFG_DEBUG_ENABLED + non_debug_exception, + debug_exception, +`else + exception, +`endif + eret_q_x, +`ifdef CFG_DEBUG_ENABLED + bret_q_x, +`endif + csr, + csr_write_data, + csr_write_enable, + // ----- Outputs ------- + interrupt_exception, + // To pipeline + csr_read_data + ); + +///////////////////////////////////////////////////// +// Parameters +///////////////////////////////////////////////////// + +parameter interrupts = `CFG_INTERRUPTS; // Number of interrupts + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +input [interrupts-1:0] interrupt; // Interrupt pins, active-low + +input stall_x; // Stall X pipeline stage + +`ifdef CFG_DEBUG_ENABLED +input non_debug_exception; // Non-debug related exception has been raised +input debug_exception; // Debug-related exception has been raised +`else +input exception; // Exception has been raised +`endif +input eret_q_x; // Return from exception +`ifdef CFG_DEBUG_ENABLED +input bret_q_x; // Return from breakpoint +`endif + +input [`LM32_CSR_RNG] csr; // CSR read/write index +input [`LM32_WORD_RNG] csr_write_data; // Data to write to specified CSR +input csr_write_enable; // CSR write enable + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output interrupt_exception; // Request to raide an interrupt exception +wire interrupt_exception; + +output [`LM32_WORD_RNG] csr_read_data; // Data read from CSR +reg [`LM32_WORD_RNG] csr_read_data; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +wire [interrupts-1:0] asserted; // Which interrupts are currently being asserted +//pragma attribute asserted preserve_signal true +wire [interrupts-1:0] interrupt_n_exception; + +// Interrupt CSRs + +reg ie; // Interrupt enable +reg eie; // Exception interrupt enable +`ifdef CFG_DEBUG_ENABLED +reg bie; // Breakpoint interrupt enable +`endif +reg [interrupts-1:0] ip; // Interrupt pending +reg [interrupts-1:0] im; // Interrupt mask + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +// Determine which interrupts have occured and are unmasked +assign interrupt_n_exception = ip & im; + +// Determine if any unmasked interrupts have occured +assign interrupt_exception = (|interrupt_n_exception) & ie; + +// Determine which interrupts are currently being asserted (active-low) or are already pending +assign asserted = ip | interrupt; + +assign ie_csr_read_data = {{`LM32_WORD_WIDTH-3{1'b0}}, +`ifdef CFG_DEBUG_ENABLED + bie, +`else + 1'b0, +`endif + eie, + ie + }; +assign ip_csr_read_data = ip; +assign im_csr_read_data = im; +generate + if (interrupts > 1) + begin +// CSR read +always @(*) +begin + case (csr) + `LM32_CSR_IE: csr_read_data = {{`LM32_WORD_WIDTH-3{1'b0}}, +`ifdef CFG_DEBUG_ENABLED + bie, +`else + 1'b0, +`endif + eie, + ie + }; + `LM32_CSR_IP: csr_read_data = ip; + `LM32_CSR_IM: csr_read_data = im; + default: csr_read_data = {`LM32_WORD_WIDTH{1'bx}}; + endcase +end + end + else + begin +// CSR read +always @(*) +begin + case (csr) + `LM32_CSR_IE: csr_read_data = {{`LM32_WORD_WIDTH-3{1'b0}}, +`ifdef CFG_DEBUG_ENABLED + bie, +`else + 1'b0, +`endif + eie, + ie + }; + `LM32_CSR_IP: csr_read_data = ip; + default: csr_read_data = {`LM32_WORD_WIDTH{1'bx}}; + endcase +end + end +endgenerate + +///////////////////////////////////////////////////// +// Sequential Logic +///////////////////////////////////////////////////// + +generate + if (interrupts > 1) + begin +// IE, IM, IP - Interrupt Enable, Interrupt Mask and Interrupt Pending CSRs +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + ie <= `FALSE; + eie <= `FALSE; +`ifdef CFG_DEBUG_ENABLED + bie <= `FALSE; +`endif + im <= {interrupts{1'b0}}; + ip <= {interrupts{1'b0}}; + end + else + begin + // Set IP bit when interrupt line is asserted + ip <= asserted; +`ifdef CFG_DEBUG_ENABLED + if (non_debug_exception == `TRUE) + begin + // Save and then clear interrupt enable + eie <= ie; + ie <= `FALSE; + end + else if (debug_exception == `TRUE) + begin + // Save and then clear interrupt enable + bie <= ie; + ie <= `FALSE; + end +`else + if (exception == `TRUE) + begin + // Save and then clear interrupt enable + eie <= ie; + ie <= `FALSE; + end +`endif + else if (stall_x == `FALSE) + begin + if (eret_q_x == `TRUE) + // Restore interrupt enable + ie <= eie; +`ifdef CFG_DEBUG_ENABLED + else if (bret_q_x == `TRUE) + // Restore interrupt enable + ie <= bie; +`endif + else if (csr_write_enable == `TRUE) + begin + // Handle wcsr write + if (csr == `LM32_CSR_IE) + begin + ie <= csr_write_data[0]; + eie <= csr_write_data[1]; +`ifdef CFG_DEBUG_ENABLED + bie <= csr_write_data[2]; +`endif + end + if (csr == `LM32_CSR_IM) + im <= csr_write_data[interrupts-1:0]; + if (csr == `LM32_CSR_IP) + ip <= asserted & ~csr_write_data[interrupts-1:0]; + end + end + end +end + end +else + begin +// IE, IM, IP - Interrupt Enable, Interrupt Mask and Interrupt Pending CSRs +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + ie <= `FALSE; + eie <= `FALSE; +`ifdef CFG_DEBUG_ENABLED + bie <= `FALSE; +`endif + ip <= {interrupts{1'b0}}; + end + else + begin + // Set IP bit when interrupt line is asserted + ip <= asserted; +`ifdef CFG_DEBUG_ENABLED + if (non_debug_exception == `TRUE) + begin + // Save and then clear interrupt enable + eie <= ie; + ie <= `FALSE; + end + else if (debug_exception == `TRUE) + begin + // Save and then clear interrupt enable + bie <= ie; + ie <= `FALSE; + end +`else + if (exception == `TRUE) + begin + // Save and then clear interrupt enable + eie <= ie; + ie <= `FALSE; + end +`endif + else if (stall_x == `FALSE) + begin + if (eret_q_x == `TRUE) + // Restore interrupt enable + ie <= eie; +`ifdef CFG_DEBUG_ENABLED + else if (bret_q_x == `TRUE) + // Restore interrupt enable + ie <= bie; +`endif + else if (csr_write_enable == `TRUE) + begin + // Handle wcsr write + if (csr == `LM32_CSR_IE) + begin + ie <= csr_write_data[0]; + eie <= csr_write_data[1]; +`ifdef CFG_DEBUG_ENABLED + bie <= csr_write_data[2]; +`endif + end + if (csr == `LM32_CSR_IP) + ip <= asserted & ~csr_write_data[interrupts-1:0]; + end + end + end +end + end +endgenerate + +endmodule + diff --git a/verilog/lm32/lm32_jtag.v b/verilog/lm32/lm32_jtag.v new file mode 100644 index 00000000..1904ccbf --- /dev/null +++ b/verilog/lm32/lm32_jtag.v @@ -0,0 +1,498 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_jtag.v +// Title : JTAG interface +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +`ifdef CFG_JTAG_ENABLED + +`define LM32_DP 3'b000 +`define LM32_TX 3'b001 +`define LM32_RX 3'b010 + +// LM32 Debug Protocol commands IDs +`define LM32_DP_RNG 3:0 +`define LM32_DP_READ_MEMORY 4'b0001 +`define LM32_DP_WRITE_MEMORY 4'b0010 +`define LM32_DP_READ_SEQUENTIAL 4'b0011 +`define LM32_DP_WRITE_SEQUENTIAL 4'b0100 +`define LM32_DP_WRITE_CSR 4'b0101 +`define LM32_DP_BREAK 4'b0110 +`define LM32_DP_RESET 4'b0111 + +// States for FSM +`define LM32_JTAG_STATE_RNG 3:0 +`define LM32_JTAG_STATE_READ_COMMAND 4'h0 +`define LM32_JTAG_STATE_READ_BYTE_0 4'h1 +`define LM32_JTAG_STATE_READ_BYTE_1 4'h2 +`define LM32_JTAG_STATE_READ_BYTE_2 4'h3 +`define LM32_JTAG_STATE_READ_BYTE_3 4'h4 +`define LM32_JTAG_STATE_READ_BYTE_4 4'h5 +`define LM32_JTAG_STATE_PROCESS_COMMAND 4'h6 +`define LM32_JTAG_STATE_WAIT_FOR_MEMORY 4'h7 +`define LM32_JTAG_STATE_WAIT_FOR_CSR 4'h8 + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_jtag ( + // ----- Inputs ------- + clk_i, + rst_i, + jtag_clk, + jtag_update, + jtag_reg_q, + jtag_reg_addr_q, +`ifdef CFG_JTAG_UART_ENABLED + csr, + csr_write_enable, + csr_write_data, + stall_x, +`endif +`ifdef CFG_HW_DEBUG_ENABLED + jtag_read_data, + jtag_access_complete, +`endif +`ifdef CFG_DEBUG_ENABLED + exception_q_w, +`endif + // ----- Outputs ------- +`ifdef CFG_JTAG_UART_ENABLED + jtx_csr_read_data, + jrx_csr_read_data, +`endif +`ifdef CFG_HW_DEBUG_ENABLED + jtag_csr_write_enable, + jtag_csr_write_data, + jtag_csr, + jtag_read_enable, + jtag_write_enable, + jtag_write_data, + jtag_address, +`endif +`ifdef CFG_DEBUG_ENABLED + jtag_break, + jtag_reset, +`endif + jtag_reg_d, + jtag_reg_addr_d + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +input jtag_clk; // JTAG clock +input jtag_update; // JTAG data register has been updated +input [`LM32_BYTE_RNG] jtag_reg_q; // JTAG data register +input [2:0] jtag_reg_addr_q; // JTAG data register + +`ifdef CFG_JTAG_UART_ENABLED +input [`LM32_CSR_RNG] csr; // CSR to write +input csr_write_enable; // CSR write enable +input [`LM32_WORD_RNG] csr_write_data; // Data to write to specified CSR +input stall_x; // Stall instruction in X stage +`endif +`ifdef CFG_HW_DEBUG_ENABLED +input [`LM32_BYTE_RNG] jtag_read_data; // Data read from requested address +input jtag_access_complete; // Memory access if complete +`endif +`ifdef CFG_DEBUG_ENABLED +input exception_q_w; // Indicates an exception has occured in W stage +`endif + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +`ifdef CFG_JTAG_UART_ENABLED +output [`LM32_WORD_RNG] jtx_csr_read_data; // Value of JTX CSR for rcsr instructions +wire [`LM32_WORD_RNG] jtx_csr_read_data; +output [`LM32_WORD_RNG] jrx_csr_read_data; // Value of JRX CSR for rcsr instructions +wire [`LM32_WORD_RNG] jrx_csr_read_data; +`endif +`ifdef CFG_HW_DEBUG_ENABLED +output jtag_csr_write_enable; // CSR write enable +reg jtag_csr_write_enable; +output [`LM32_WORD_RNG] jtag_csr_write_data; // Data to write to specified CSR +wire [`LM32_WORD_RNG] jtag_csr_write_data; +output [`LM32_CSR_RNG] jtag_csr; // CSR to write +wire [`LM32_CSR_RNG] jtag_csr; +output jtag_read_enable; // Memory read enable +reg jtag_read_enable; +output jtag_write_enable; // Memory write enable +reg jtag_write_enable; +output [`LM32_BYTE_RNG] jtag_write_data; // Data to write to specified address +wire [`LM32_BYTE_RNG] jtag_write_data; +output [`LM32_WORD_RNG] jtag_address; // Memory read/write address +wire [`LM32_WORD_RNG] jtag_address; +`endif +`ifdef CFG_DEBUG_ENABLED +output jtag_break; // Request to raise a breakpoint exception +reg jtag_break; +output jtag_reset; // Request to raise a reset exception +reg jtag_reset; +`endif +output [`LM32_BYTE_RNG] jtag_reg_d; +reg [`LM32_BYTE_RNG] jtag_reg_d; +output [2:0] jtag_reg_addr_d; +wire [2:0] jtag_reg_addr_d; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +reg rx_toggle; // Clock-domain crossing registers +reg rx_toggle_r; // Registered version of rx_toggle +reg rx_toggle_r_r; // Registered version of rx_toggle_r +reg rx_toggle_r_r_r; // Registered version of rx_toggle_r_r + +reg [`LM32_BYTE_RNG] rx_byte; +reg [2:0] rx_addr; + +`ifdef CFG_JTAG_UART_ENABLED +reg [`LM32_BYTE_RNG] uart_tx_byte; // UART TX data +reg uart_tx_valid; // TX data is valid +reg [`LM32_BYTE_RNG] uart_rx_byte; // UART RX data +reg uart_rx_valid; // RX data is valid +`endif + +reg [`LM32_DP_RNG] command; // The last received command +`ifdef CFG_HW_DEBUG_ENABLED +reg [`LM32_BYTE_RNG] jtag_byte_0; // Registers to hold command paramaters +reg [`LM32_BYTE_RNG] jtag_byte_1; +reg [`LM32_BYTE_RNG] jtag_byte_2; +reg [`LM32_BYTE_RNG] jtag_byte_3; +reg [`LM32_BYTE_RNG] jtag_byte_4; +reg processing; // Indicates if we're still processing a memory read/write +`endif + +reg [`LM32_JTAG_STATE_RNG] state; // Current state of FSM + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +`ifdef CFG_HW_DEBUG_ENABLED +assign jtag_csr_write_data = {jtag_byte_0, jtag_byte_1, jtag_byte_2, jtag_byte_3}; +assign jtag_csr = jtag_byte_4[`LM32_CSR_RNG]; +assign jtag_address = {jtag_byte_0, jtag_byte_1, jtag_byte_2, jtag_byte_3}; +assign jtag_write_data = jtag_byte_4; +`endif + +// Generate status flags for reading via the JTAG interface +`ifdef CFG_JTAG_UART_ENABLED +assign jtag_reg_addr_d[1:0] = {uart_rx_valid, uart_tx_valid}; +`else +assign jtag_reg_addr_d[1:0] = 2'b00; +`endif +`ifdef CFG_HW_DEBUG_ENABLED +assign jtag_reg_addr_d[2] = processing; +`else +assign jtag_reg_addr_d[2] = 1'b0; +`endif + +`ifdef CFG_JTAG_UART_ENABLED +assign jtx_csr_read_data = {{`LM32_WORD_WIDTH-9{1'b0}}, uart_tx_valid, 8'h00}; +assign jrx_csr_read_data = {{`LM32_WORD_WIDTH-9{1'b0}}, uart_rx_valid, uart_rx_byte}; +`endif + +///////////////////////////////////////////////////// +// Sequential Logic +///////////////////////////////////////////////////// + +// Toggle a flag when a JTAG write occurs + +always @(negedge jtag_update `CFG_RESET_SENSITIVITY) +begin +if (rst_i == `TRUE) + rx_toggle <= 1'b0; +else + rx_toggle <= ~rx_toggle; +end + +always @(*) +begin + rx_byte = jtag_reg_q; + rx_addr = jtag_reg_addr_q; +end + +// Clock domain crossing from JTAG clock domain to CPU clock domain +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + rx_toggle_r <= 1'b0; + rx_toggle_r_r <= 1'b0; + rx_toggle_r_r_r <= 1'b0; + end + else + begin + rx_toggle_r <= rx_toggle; + rx_toggle_r_r <= rx_toggle_r; + rx_toggle_r_r_r <= rx_toggle_r_r; + end +end + +// LM32 debug protocol state machine +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + state <= `LM32_JTAG_STATE_READ_COMMAND; + command <= 4'b0000; + jtag_reg_d <= 8'h00; +`ifdef CFG_HW_DEBUG_ENABLED + processing <= `FALSE; + jtag_csr_write_enable <= `FALSE; + jtag_read_enable <= `FALSE; + jtag_write_enable <= `FALSE; +`endif +`ifdef CFG_DEBUG_ENABLED + jtag_break <= `FALSE; + jtag_reset <= `FALSE; +`endif +`ifdef CFG_JTAG_UART_ENABLED + uart_tx_byte <= 8'h00; + uart_tx_valid <= `FALSE; + uart_rx_byte <= 8'h00; + uart_rx_valid <= `FALSE; +`endif + end + else + begin +`ifdef CFG_JTAG_UART_ENABLED + if ((csr_write_enable == `TRUE) && (stall_x == `FALSE)) + begin + case (csr) + `LM32_CSR_JTX: + begin + // Set flag indicating data is available + uart_tx_byte <= csr_write_data[`LM32_BYTE_0_RNG]; + uart_tx_valid <= `TRUE; + end + `LM32_CSR_JRX: + begin + // Clear flag indidicating data has been received + uart_rx_valid <= `FALSE; + end + endcase + end +`endif +`ifdef CFG_DEBUG_ENABLED + // When an exception has occured, clear the requests + if (exception_q_w == `TRUE) + begin + jtag_break <= `FALSE; + jtag_reset <= `FALSE; + end +`endif + case (state) + `LM32_JTAG_STATE_READ_COMMAND: + begin + // Wait for rx register to toggle which indicates new data is available + if (rx_toggle_r_r != rx_toggle_r_r_r) + begin + command <= rx_byte[7:4]; + case (rx_addr) +`ifdef CFG_DEBUG_ENABLED + `LM32_DP: + begin + case (rx_byte[7:4]) +`ifdef CFG_HW_DEBUG_ENABLED + `LM32_DP_READ_MEMORY: + state <= `LM32_JTAG_STATE_READ_BYTE_0; + `LM32_DP_READ_SEQUENTIAL: + begin + {jtag_byte_2, jtag_byte_3} <= {jtag_byte_2, jtag_byte_3} + 1'b1; + state <= `LM32_JTAG_STATE_PROCESS_COMMAND; + end + `LM32_DP_WRITE_MEMORY: + state <= `LM32_JTAG_STATE_READ_BYTE_0; + `LM32_DP_WRITE_SEQUENTIAL: + begin + {jtag_byte_2, jtag_byte_3} <= {jtag_byte_2, jtag_byte_3} + 1'b1; + state <= 5; + end + `LM32_DP_WRITE_CSR: + state <= `LM32_JTAG_STATE_READ_BYTE_0; +`endif + `LM32_DP_BREAK: + begin +`ifdef CFG_JTAG_UART_ENABLED + uart_rx_valid <= `FALSE; + uart_tx_valid <= `FALSE; +`endif + jtag_break <= `TRUE; + end + `LM32_DP_RESET: + begin +`ifdef CFG_JTAG_UART_ENABLED + uart_rx_valid <= `FALSE; + uart_tx_valid <= `FALSE; +`endif + jtag_reset <= `TRUE; + end + endcase + end +`endif +`ifdef CFG_JTAG_UART_ENABLED + `LM32_TX: + begin + uart_rx_byte <= rx_byte; + uart_rx_valid <= `TRUE; + end + `LM32_RX: + begin + jtag_reg_d <= uart_tx_byte; + uart_tx_valid <= `FALSE; + end +`endif + default: + ; + endcase + end + end +`ifdef CFG_HW_DEBUG_ENABLED + `LM32_JTAG_STATE_READ_BYTE_0: + begin + if (rx_toggle_r_r != rx_toggle_r_r_r) + begin + jtag_byte_0 <= rx_byte; + state <= `LM32_JTAG_STATE_READ_BYTE_1; + end + end + `LM32_JTAG_STATE_READ_BYTE_1: + begin + if (rx_toggle_r_r != rx_toggle_r_r_r) + begin + jtag_byte_1 <= rx_byte; + state <= `LM32_JTAG_STATE_READ_BYTE_2; + end + end + `LM32_JTAG_STATE_READ_BYTE_2: + begin + if (rx_toggle_r_r != rx_toggle_r_r_r) + begin + jtag_byte_2 <= rx_byte; + state <= `LM32_JTAG_STATE_READ_BYTE_3; + end + end + `LM32_JTAG_STATE_READ_BYTE_3: + begin + if (rx_toggle_r_r != rx_toggle_r_r_r) + begin + jtag_byte_3 <= rx_byte; + if (command == `LM32_DP_READ_MEMORY) + state <= `LM32_JTAG_STATE_PROCESS_COMMAND; + else + state <= `LM32_JTAG_STATE_READ_BYTE_4; + end + end + `LM32_JTAG_STATE_READ_BYTE_4: + begin + if (rx_toggle_r_r != rx_toggle_r_r_r) + begin + jtag_byte_4 <= rx_byte; + state <= `LM32_JTAG_STATE_PROCESS_COMMAND; + end + end + `LM32_JTAG_STATE_PROCESS_COMMAND: + begin + case (command) + `LM32_DP_READ_MEMORY, + `LM32_DP_READ_SEQUENTIAL: + begin + jtag_read_enable <= `TRUE; + processing <= `TRUE; + state <= `LM32_JTAG_STATE_WAIT_FOR_MEMORY; + end + `LM32_DP_WRITE_MEMORY, + `LM32_DP_WRITE_SEQUENTIAL: + begin + jtag_write_enable <= `TRUE; + processing <= `TRUE; + state <= `LM32_JTAG_STATE_WAIT_FOR_MEMORY; + end + `LM32_DP_WRITE_CSR: + begin + jtag_csr_write_enable <= `TRUE; + processing <= `TRUE; + state <= `LM32_JTAG_STATE_WAIT_FOR_CSR; + end + endcase + end + `LM32_JTAG_STATE_WAIT_FOR_MEMORY: + begin + if (jtag_access_complete == `TRUE) + begin + jtag_read_enable <= `FALSE; + jtag_reg_d <= jtag_read_data; + jtag_write_enable <= `FALSE; + processing <= `FALSE; + state <= `LM32_JTAG_STATE_READ_COMMAND; + end + end + `LM32_JTAG_STATE_WAIT_FOR_CSR: + begin + jtag_csr_write_enable <= `FALSE; + processing <= `FALSE; + state <= `LM32_JTAG_STATE_READ_COMMAND; + end +`endif + endcase + end +end + +endmodule + +`endif diff --git a/verilog/lm32/lm32_load_store_unit.v b/verilog/lm32/lm32_load_store_unit.v new file mode 100644 index 00000000..4a86e7b0 --- /dev/null +++ b/verilog/lm32/lm32_load_store_unit.v @@ -0,0 +1,829 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_load_store_unit.v +// Title : Load and store unit +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : Instead of disallowing an instruction cache miss on a data cache +// : miss, both can now occur at the same time. If both occur at same +// : time, then restart address is the address of instruction that +// : caused data cache miss. +// Version : 3.2 +// : EBRs use SYNC resets instead of ASYNC resets. +// Version : 3.3 +// : Support for new non-cacheable Data Memory that is accessible by +// : the data port and has a one cycle access latency. +// Version : 3.4 +// : No change +// Version : 3.5 +// : Bug fix: Inline memory is correctly generated if it is not a +// : power-of-two +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_load_store_unit ( + // ----- Inputs ------- + clk_i, + rst_i, + // From pipeline + stall_a, + stall_x, + stall_m, + kill_x, + kill_m, + exception_m, + store_operand_x, + load_store_address_x, + load_store_address_m, + load_store_address_w, + load_x, + store_x, + load_q_x, + store_q_x, + load_q_m, + store_q_m, + sign_extend_x, + size_x, +`ifdef CFG_DCACHE_ENABLED + dflush, +`endif +`ifdef CFG_IROM_ENABLED + irom_data_m, +`endif + // From Wishbone + d_dat_i, + d_ack_i, + d_err_i, + d_rty_i, + // ----- Outputs ------- + // To pipeline +`ifdef CFG_DCACHE_ENABLED + dcache_refill_request, + dcache_restart_request, + dcache_stall_request, + dcache_refilling, +`endif +`ifdef CFG_IROM_ENABLED + irom_store_data_m, + irom_address_xm, + irom_we_xm, + irom_stall_request_x, +`endif + load_data_w, + stall_wb_load, + // To Wishbone + d_dat_o, + d_adr_o, + d_cyc_o, + d_sel_o, + d_stb_o, + d_we_o, + d_cti_o, + d_lock_o, + d_bte_o + ); + +///////////////////////////////////////////////////// +// Parameters +///////////////////////////////////////////////////// + +parameter associativity = 1; // Associativity of the cache (Number of ways) +parameter sets = 512; // Number of sets +parameter bytes_per_line = 16; // Number of bytes per cache line +parameter base_address = 0; // Base address of cachable memory +parameter limit = 0; // Limit (highest address) of cachable memory + +// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used +localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2; +localparam addr_offset_lsb = 2; +localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +input stall_a; // A stage stall +input stall_x; // X stage stall +input stall_m; // M stage stall +input kill_x; // Kill instruction in X stage +input kill_m; // Kill instruction in M stage +input exception_m; // An exception occured in the M stage + +input [`LM32_WORD_RNG] store_operand_x; // Data read from register to store +input [`LM32_WORD_RNG] load_store_address_x; // X stage load/store address +input [`LM32_WORD_RNG] load_store_address_m; // M stage load/store address +input [1:0] load_store_address_w; // W stage load/store address (only least two significant bits are needed) +input load_x; // Load instruction in X stage +input store_x; // Store instruction in X stage +input load_q_x; // Load instruction in X stage +input store_q_x; // Store instruction in X stage +input load_q_m; // Load instruction in M stage +input store_q_m; // Store instruction in M stage +input sign_extend_x; // Whether load instruction in X stage should sign extend or zero extend +input [`LM32_SIZE_RNG] size_x; // Size of load or store (byte, hword, word) + +`ifdef CFG_DCACHE_ENABLED +input dflush; // Flush the data cache +`endif + +`ifdef CFG_IROM_ENABLED +input [`LM32_WORD_RNG] irom_data_m; // Data from Instruction-ROM +`endif + +input [`LM32_WORD_RNG] d_dat_i; // Data Wishbone interface read data +input d_ack_i; // Data Wishbone interface acknowledgement +input d_err_i; // Data Wishbone interface error +input d_rty_i; // Data Wishbone interface retry + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +`ifdef CFG_DCACHE_ENABLED +output dcache_refill_request; // Request to refill data cache +wire dcache_refill_request; +output dcache_restart_request; // Request to restart the instruction that caused a data cache miss +wire dcache_restart_request; +output dcache_stall_request; // Data cache stall request +wire dcache_stall_request; +output dcache_refilling; +wire dcache_refilling; +`endif + +`ifdef CFG_IROM_ENABLED +output irom_store_data_m; // Store data to Instruction ROM +wire [`LM32_WORD_RNG] irom_store_data_m; +output [`LM32_WORD_RNG] irom_address_xm; // Load/store address to Instruction ROM +wire [`LM32_WORD_RNG] irom_address_xm; +output irom_we_xm; // Write-enable of 2nd port of Instruction ROM +wire irom_we_xm; +output irom_stall_request_x; // Stall instruction in D stage +wire irom_stall_request_x; +`endif + +output [`LM32_WORD_RNG] load_data_w; // Result of a load instruction +reg [`LM32_WORD_RNG] load_data_w; +output stall_wb_load; // Request to stall pipeline due to a load from the Wishbone interface +reg stall_wb_load; + +output [`LM32_WORD_RNG] d_dat_o; // Data Wishbone interface write data +reg [`LM32_WORD_RNG] d_dat_o; +output [`LM32_WORD_RNG] d_adr_o; // Data Wishbone interface address +reg [`LM32_WORD_RNG] d_adr_o; +output d_cyc_o; // Data Wishbone interface cycle +reg d_cyc_o; +output [`LM32_BYTE_SELECT_RNG] d_sel_o; // Data Wishbone interface byte select +reg [`LM32_BYTE_SELECT_RNG] d_sel_o; +output d_stb_o; // Data Wishbone interface strobe +reg d_stb_o; +output d_we_o; // Data Wishbone interface write enable +reg d_we_o; +output [`LM32_CTYPE_RNG] d_cti_o; // Data Wishbone interface cycle type +reg [`LM32_CTYPE_RNG] d_cti_o; +output d_lock_o; // Date Wishbone interface lock bus +reg d_lock_o; +output [`LM32_BTYPE_RNG] d_bte_o; // Data Wishbone interface burst type +wire [`LM32_BTYPE_RNG] d_bte_o; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +// Microcode pipeline registers - See inputs for description +reg [`LM32_SIZE_RNG] size_m; +reg [`LM32_SIZE_RNG] size_w; +reg sign_extend_m; +reg sign_extend_w; +reg [`LM32_WORD_RNG] store_data_x; +reg [`LM32_WORD_RNG] store_data_m; +reg [`LM32_BYTE_SELECT_RNG] byte_enable_x; +reg [`LM32_BYTE_SELECT_RNG] byte_enable_m; +wire [`LM32_WORD_RNG] data_m; +reg [`LM32_WORD_RNG] data_w; + +`ifdef CFG_DCACHE_ENABLED +wire dcache_select_x; // Select data cache to load from / store to +reg dcache_select_m; +wire [`LM32_WORD_RNG] dcache_data_m; // Data read from cache +wire [`LM32_WORD_RNG] dcache_refill_address; // Address to refill data cache from +reg dcache_refill_ready; // Indicates the next word of refill data is ready +wire [`LM32_CTYPE_RNG] first_cycle_type; // First Wishbone cycle type +wire [`LM32_CTYPE_RNG] next_cycle_type; // Next Wishbone cycle type +wire last_word; // Indicates if this is the last word in the cache line +wire [`LM32_WORD_RNG] first_address; // First cache refill address +`endif +`ifdef CFG_DRAM_ENABLED +wire dram_select_x; // Select data RAM to load from / store to +reg dram_select_m; +reg dram_bypass_en; // RAW in data RAM; read latched (bypass) value rather than value from memory +reg [`LM32_WORD_RNG] dram_bypass_data; // Latched value of store'd data to data RAM +wire [`LM32_WORD_RNG] dram_data_out; // Data read from data RAM +wire [`LM32_WORD_RNG] dram_data_m; // Data read from data RAM: bypass value or value from memory +wire [`LM32_WORD_RNG] dram_store_data_m; // Data to write to RAM +`endif +wire wb_select_x; // Select Wishbone to load from / store to +`ifdef CFG_IROM_ENABLED +wire irom_select_x; // Select instruction ROM to load from / store to +reg irom_select_m; +`endif +reg wb_select_m; +reg [`LM32_WORD_RNG] wb_data_m; // Data read from Wishbone +reg wb_load_complete; // Indicates when a Wishbone load is complete + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" + +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + +`ifdef CFG_DRAM_ENABLED + // Data RAM + pmi_ram_dp_true + #( + // ----- Parameters ------- + .pmi_family (`LATTICE_FAMILY), + + //.pmi_addr_depth_a (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), + //.pmi_addr_width_a ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), + //.pmi_data_width_a (`LM32_WORD_WIDTH), + //.pmi_addr_depth_b (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), + //.pmi_addr_width_b ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)), + //.pmi_data_width_b (`LM32_WORD_WIDTH), + + .pmi_addr_depth_a (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1), + .pmi_addr_width_a (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)), + .pmi_data_width_a (`LM32_WORD_WIDTH), + .pmi_addr_depth_b (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1), + .pmi_addr_width_b (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)), + .pmi_data_width_b (`LM32_WORD_WIDTH), + + .pmi_regmode_a ("noreg"), + .pmi_regmode_b ("noreg"), + .pmi_gsr ("enable"), + .pmi_resetmode ("sync"), + .pmi_init_file (`CFG_DRAM_INIT_FILE), + .pmi_init_file_format (`CFG_DRAM_INIT_FILE_FORMAT), + .module_type ("pmi_ram_dp_true") + ) + ram ( + // ----- Inputs ------- + .ClockA (clk_i), + .ClockB (clk_i), + .ResetA (rst_i), + .ResetB (rst_i), + .DataInA ({32{1'b0}}), + .DataInB (dram_store_data_m), + .AddressA (load_store_address_x[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]), + .AddressB (load_store_address_m[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]), + // .ClockEnA (!stall_x & (load_x | store_x)), + .ClockEnA (!stall_x), + .ClockEnB (!stall_m), + .WrA (`FALSE), + .WrB (store_q_m & dram_select_m), + // ----- Outputs ------- + .QA (dram_data_out), + .QB () + ); + + /*---------------------------------------------------------------------- + EBRs cannot perform reads from location 'written to' on the same clock + edge. Therefore bypass logic is required to latch the store'd value + and use it for the load (instead of value from memory). + ----------------------------------------------------------------------*/ + always @(posedge clk_i `CFG_RESET_SENSITIVITY) + if (rst_i == `TRUE) + begin + dram_bypass_en <= `FALSE; + dram_bypass_data <= 0; + end + else + begin + if (stall_x == `FALSE) + dram_bypass_data <= dram_store_data_m; + + if ( (stall_m == `FALSE) + && (stall_x == `FALSE) + && (store_q_m == `TRUE) + && ( (load_x == `TRUE) + || (store_x == `TRUE) + ) + && (load_store_address_x[(`LM32_WORD_WIDTH-1):2] == load_store_address_m[(`LM32_WORD_WIDTH-1):2]) + ) + dram_bypass_en <= `TRUE; + else + if ( (dram_bypass_en == `TRUE) + && (stall_x == `FALSE) + ) + dram_bypass_en <= `FALSE; + end + + assign dram_data_m = dram_bypass_en ? dram_bypass_data : dram_data_out; +`endif + +`ifdef CFG_DCACHE_ENABLED +// Data cache +lm32_dcache #( + .associativity (associativity), + .sets (sets), + .bytes_per_line (bytes_per_line), + .base_address (base_address), + .limit (limit) + ) dcache ( + // ----- Inputs ----- + .clk_i (clk_i), + .rst_i (rst_i), + .stall_a (stall_a), + .stall_x (stall_x), + .stall_m (stall_m), + .address_x (load_store_address_x), + .address_m (load_store_address_m), + .load_q_m (load_q_m & dcache_select_m), + .store_q_m (store_q_m & dcache_select_m), + .store_data (store_data_m), + .store_byte_select (byte_enable_m & {4{dcache_select_m}}), + .refill_ready (dcache_refill_ready), + .refill_data (wb_data_m), + .dflush (dflush), + // ----- Outputs ----- + .stall_request (dcache_stall_request), + .restart_request (dcache_restart_request), + .refill_request (dcache_refill_request), + .refill_address (dcache_refill_address), + .refilling (dcache_refilling), + .load_data (dcache_data_m) + ); +`endif + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +// Select where data should be loaded from / stored to +`ifdef CFG_DRAM_ENABLED + assign dram_select_x = (load_store_address_x >= `CFG_DRAM_BASE_ADDRESS) + && (load_store_address_x <= `CFG_DRAM_LIMIT); +`endif + +`ifdef CFG_IROM_ENABLED + assign irom_select_x = (load_store_address_x >= `CFG_IROM_BASE_ADDRESS) + && (load_store_address_x <= `CFG_IROM_LIMIT); +`endif + +`ifdef CFG_DCACHE_ENABLED + assign dcache_select_x = (load_store_address_x >= `CFG_DCACHE_BASE_ADDRESS) + && (load_store_address_x <= `CFG_DCACHE_LIMIT) +`ifdef CFG_DRAM_ENABLED + && (dram_select_x == `FALSE) +`endif +`ifdef CFG_IROM_ENABLED + && (irom_select_x == `FALSE) +`endif + ; +`endif + + assign wb_select_x = `TRUE +`ifdef CFG_DCACHE_ENABLED + && !dcache_select_x +`endif +`ifdef CFG_DRAM_ENABLED + && !dram_select_x +`endif +`ifdef CFG_IROM_ENABLED + && !irom_select_x +`endif + ; + +// Make sure data to store is in correct byte lane +always @(*) +begin + case (size_x) + `LM32_SIZE_BYTE: store_data_x = {4{store_operand_x[7:0]}}; + `LM32_SIZE_HWORD: store_data_x = {2{store_operand_x[15:0]}}; + `LM32_SIZE_WORD: store_data_x = store_operand_x; + default: store_data_x = {`LM32_WORD_WIDTH{1'bx}}; + endcase +end + +// Generate byte enable accoring to size of load or store and address being accessed +always @(*) +begin + casez ({size_x, load_store_address_x[1:0]}) + {`LM32_SIZE_BYTE, 2'b11}: byte_enable_x = 4'b0001; + {`LM32_SIZE_BYTE, 2'b10}: byte_enable_x = 4'b0010; + {`LM32_SIZE_BYTE, 2'b01}: byte_enable_x = 4'b0100; + {`LM32_SIZE_BYTE, 2'b00}: byte_enable_x = 4'b1000; + {`LM32_SIZE_HWORD, 2'b1?}: byte_enable_x = 4'b0011; + {`LM32_SIZE_HWORD, 2'b0?}: byte_enable_x = 4'b1100; + {`LM32_SIZE_WORD, 2'b??}: byte_enable_x = 4'b1111; + default: byte_enable_x = 4'bxxxx; + endcase +end + +`ifdef CFG_DRAM_ENABLED +// Only replace selected bytes +assign dram_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : dram_data_m[`LM32_BYTE_0_RNG]; +assign dram_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : dram_data_m[`LM32_BYTE_1_RNG]; +assign dram_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : dram_data_m[`LM32_BYTE_2_RNG]; +assign dram_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : dram_data_m[`LM32_BYTE_3_RNG]; +`endif + +`ifdef CFG_IROM_ENABLED +// Only replace selected bytes +assign irom_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : irom_data_m[`LM32_BYTE_0_RNG]; +assign irom_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : irom_data_m[`LM32_BYTE_1_RNG]; +assign irom_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : irom_data_m[`LM32_BYTE_2_RNG]; +assign irom_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : irom_data_m[`LM32_BYTE_3_RNG]; +`endif + +`ifdef CFG_IROM_ENABLED + // Instead of implementing a byte-addressable instruction ROM (for store byte instruction), + // a load-and-store architecture is used wherein a 32-bit value is loaded, the requisite + // byte is replaced, and the whole 32-bit value is written back + + assign irom_address_xm = ((irom_select_m == `TRUE) && (store_q_m == `TRUE)) + ? load_store_address_m + : load_store_address_x; + + // All store instructions perform a write operation in the M stage + assign irom_we_xm = (irom_select_m == `TRUE) + && (store_q_m == `TRUE); + + // A single port in instruction ROM is available to load-store unit for doing loads/stores. + // Since every store requires a load (in X stage) and then a store (in M stage), we cannot + // allow load (or store) instructions sequentially after the store instructions to proceed + // until the store instruction has vacated M stage (i.e., completed the store operation) + assign irom_stall_request_x = (irom_select_x == `TRUE) + && (store_q_x == `TRUE); +`endif + +`ifdef CFG_DCACHE_ENABLED + `ifdef CFG_DRAM_ENABLED + `ifdef CFG_IROM_ENABLED + // WB + DC + DRAM + IROM + assign data_m = wb_select_m == `TRUE + ? wb_data_m + : dram_select_m == `TRUE + ? dram_data_m + : irom_select_m == `TRUE + ? irom_data_m + : dcache_data_m; + `else + // WB + DC + DRAM + assign data_m = wb_select_m == `TRUE + ? wb_data_m + : dram_select_m == `TRUE + ? dram_data_m + : dcache_data_m; + `endif + `else + `ifdef CFG_IROM_ENABLED + // WB + DC + IROM + assign data_m = wb_select_m == `TRUE + ? wb_data_m + : irom_select_m == `TRUE + ? irom_data_m + : dcache_data_m; + `else + // WB + DC + assign data_m = wb_select_m == `TRUE + ? wb_data_m + : dcache_data_m; + `endif + `endif +`else + `ifdef CFG_DRAM_ENABLED + `ifdef CFG_IROM_ENABLED + // WB + DRAM + IROM + assign data_m = wb_select_m == `TRUE + ? wb_data_m + : dram_select_m == `TRUE + ? dram_data_m + : irom_data_m; + `else + // WB + DRAM + assign data_m = wb_select_m == `TRUE + ? wb_data_m + : dram_data_m; + `endif + `else + `ifdef CFG_IROM_ENABLED + // WB + IROM + assign data_m = wb_select_m == `TRUE + ? wb_data_m + : irom_data_m; + `else + // WB + assign data_m = wb_data_m; + `endif + `endif +`endif + +// Sub-word selection and sign/zero-extension for loads +always @(*) +begin + casez ({size_w, load_store_address_w[1:0]}) + {`LM32_SIZE_BYTE, 2'b11}: load_data_w = {{24{sign_extend_w & data_w[7]}}, data_w[7:0]}; + {`LM32_SIZE_BYTE, 2'b10}: load_data_w = {{24{sign_extend_w & data_w[15]}}, data_w[15:8]}; + {`LM32_SIZE_BYTE, 2'b01}: load_data_w = {{24{sign_extend_w & data_w[23]}}, data_w[23:16]}; + {`LM32_SIZE_BYTE, 2'b00}: load_data_w = {{24{sign_extend_w & data_w[31]}}, data_w[31:24]}; + {`LM32_SIZE_HWORD, 2'b1?}: load_data_w = {{16{sign_extend_w & data_w[15]}}, data_w[15:0]}; + {`LM32_SIZE_HWORD, 2'b0?}: load_data_w = {{16{sign_extend_w & data_w[31]}}, data_w[31:16]}; + {`LM32_SIZE_WORD, 2'b??}: load_data_w = data_w; + default: load_data_w = {`LM32_WORD_WIDTH{1'bx}}; + endcase +end + +// Unused/constant Wishbone signals +assign d_bte_o = `LM32_BTYPE_LINEAR; + +`ifdef CFG_DCACHE_ENABLED +// Generate signal to indicate last word in cache line +generate + case (bytes_per_line) + 4: + begin +assign first_cycle_type = `LM32_CTYPE_END; +assign next_cycle_type = `LM32_CTYPE_END; +assign last_word = `TRUE; +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:2], 2'b00}; + end + 8: + begin +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; +assign next_cycle_type = `LM32_CTYPE_END; +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1; +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00}; + end + 16: + begin +assign first_cycle_type = `LM32_CTYPE_INCREMENTING; +assign next_cycle_type = d_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING; +assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1; +assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00}; + end + endcase +endgenerate +`endif + +///////////////////////////////////////////////////// +// Sequential Logic +///////////////////////////////////////////////////// + +// Data Wishbone interface +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + d_cyc_o <= `FALSE; + d_stb_o <= `FALSE; + d_dat_o <= {`LM32_WORD_WIDTH{1'b0}}; + d_adr_o <= {`LM32_WORD_WIDTH{1'b0}}; + d_sel_o <= {`LM32_BYTE_SELECT_WIDTH{`FALSE}}; + d_we_o <= `FALSE; + d_cti_o <= `LM32_CTYPE_END; + d_lock_o <= `FALSE; + wb_data_m <= {`LM32_WORD_WIDTH{1'b0}}; + wb_load_complete <= `FALSE; + stall_wb_load <= `FALSE; +`ifdef CFG_DCACHE_ENABLED + dcache_refill_ready <= `FALSE; +`endif + end + else + begin +`ifdef CFG_DCACHE_ENABLED + // Refill ready should only be asserted for a single cycle + dcache_refill_ready <= `FALSE; +`endif + // Is a Wishbone cycle already in progress? + if (d_cyc_o == `TRUE) + begin + // Is the cycle complete? + if ((d_ack_i == `TRUE) || (d_err_i == `TRUE)) + begin +`ifdef CFG_DCACHE_ENABLED + if ((dcache_refilling == `TRUE) && (!last_word)) + begin + // Fetch next word of cache line + d_adr_o[addr_offset_msb:addr_offset_lsb] <= d_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1; + end + else +`endif + begin + // Refill/access complete + d_cyc_o <= `FALSE; + d_stb_o <= `FALSE; + d_lock_o <= `FALSE; + end +`ifdef CFG_DCACHE_ENABLED + d_cti_o <= next_cycle_type; + // If we are performing a refill, indicate to cache next word of data is ready + dcache_refill_ready <= dcache_refilling; +`endif + // Register data read from Wishbone interface + wb_data_m <= d_dat_i; + // Don't set when stores complete - otherwise we'll deadlock if load in m stage + wb_load_complete <= !d_we_o; + end + // synthesis translate_off + if (d_err_i == `TRUE) + $display ("Data bus error. Address: %x", d_adr_o); + // synthesis translate_on + end + else + begin +`ifdef CFG_DCACHE_ENABLED + if (dcache_refill_request == `TRUE) + begin + // Start cache refill + d_adr_o <= first_address; + d_cyc_o <= `TRUE; + d_sel_o <= {`LM32_WORD_WIDTH/8{`TRUE}}; + d_stb_o <= `TRUE; + d_we_o <= `FALSE; + d_cti_o <= first_cycle_type; + //d_lock_o <= `TRUE; + end + else +`endif + if ( (store_q_m == `TRUE) + && (stall_m == `FALSE) +`ifdef CFG_DRAM_ENABLED + && (dram_select_m == `FALSE) +`endif +`ifdef CFG_IROM_ENABLED + && (irom_select_m == `FALSE) +`endif + ) + begin + // Data cache is write through, so all stores go to memory + d_dat_o <= store_data_m; + d_adr_o <= load_store_address_m; + d_cyc_o <= `TRUE; + d_sel_o <= byte_enable_m; + d_stb_o <= `TRUE; + d_we_o <= `TRUE; + d_cti_o <= `LM32_CTYPE_END; + end + else if ( (load_q_m == `TRUE) + && (wb_select_m == `TRUE) + && (wb_load_complete == `FALSE) + // stall_m will be TRUE, because stall_wb_load will be TRUE + ) + begin + // Read requested address + stall_wb_load <= `FALSE; + d_adr_o <= load_store_address_m; + d_cyc_o <= `TRUE; + d_sel_o <= byte_enable_m; + d_stb_o <= `TRUE; + d_we_o <= `FALSE; + d_cti_o <= `LM32_CTYPE_END; + end + end + // Clear load/store complete flag when instruction leaves M stage + if (stall_m == `FALSE) + wb_load_complete <= `FALSE; + // When a Wishbone load first enters the M stage, we need to stall it + if ((load_q_x == `TRUE) && (wb_select_x == `TRUE) && (stall_x == `FALSE)) + stall_wb_load <= `TRUE; + // Clear stall request if load instruction is killed + if ((kill_m == `TRUE) || (exception_m == `TRUE)) + stall_wb_load <= `FALSE; + end +end + +// Pipeline registers + +// X/M stage pipeline registers +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + sign_extend_m <= `FALSE; + size_m <= 2'b00; + byte_enable_m <= `FALSE; + store_data_m <= {`LM32_WORD_WIDTH{1'b0}}; +`ifdef CFG_DCACHE_ENABLED + dcache_select_m <= `FALSE; +`endif +`ifdef CFG_DRAM_ENABLED + dram_select_m <= `FALSE; +`endif +`ifdef CFG_IROM_ENABLED + irom_select_m <= `FALSE; +`endif + wb_select_m <= `FALSE; + end + else + begin + if (stall_m == `FALSE) + begin + sign_extend_m <= sign_extend_x; + size_m <= size_x; + byte_enable_m <= byte_enable_x; + store_data_m <= store_data_x; +`ifdef CFG_DCACHE_ENABLED + dcache_select_m <= dcache_select_x; +`endif +`ifdef CFG_DRAM_ENABLED + dram_select_m <= dram_select_x; +`endif +`ifdef CFG_IROM_ENABLED + irom_select_m <= irom_select_x; +`endif + wb_select_m <= wb_select_x; + end + end +end + +// M/W stage pipeline registers +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + size_w <= 2'b00; + data_w <= {`LM32_WORD_WIDTH{1'b0}}; + sign_extend_w <= `FALSE; + end + else + begin + size_w <= size_m; + data_w <= data_m; + sign_extend_w <= sign_extend_m; + end +end + +///////////////////////////////////////////////////// +// Behavioural Logic +///////////////////////////////////////////////////// + +// synthesis translate_off + +// Check for non-aligned loads or stores +always @(posedge clk_i) +begin + if (((load_q_m == `TRUE) || (store_q_m == `TRUE)) && (stall_m == `FALSE)) + begin + if ((size_m === `LM32_SIZE_HWORD) && (load_store_address_m[0] !== 1'b0)) + $display ("Warning: Non-aligned halfword access. Address: 0x%0x Time: %0t.", load_store_address_m, $time); + if ((size_m === `LM32_SIZE_WORD) && (load_store_address_m[1:0] !== 2'b00)) + $display ("Warning: Non-aligned word access. Address: 0x%0x Time: %0t.", load_store_address_m, $time); + end +end + +// synthesis translate_on + +endmodule diff --git a/verilog/lm32/lm32_logic_op.v b/verilog/lm32/lm32_logic_op.v new file mode 100644 index 00000000..7b1a20dd --- /dev/null +++ b/verilog/lm32/lm32_logic_op.v @@ -0,0 +1,97 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_logic_op.v +// Title : Logic operations (and / or / not etc) +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_logic_op ( + // ----- Inputs ------- + logic_op_x, + operand_0_x, + operand_1_x, + // ----- Outputs ------- + logic_result_x + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input [`LM32_LOGIC_OP_RNG] logic_op_x; +input [`LM32_WORD_RNG] operand_0_x; +input [`LM32_WORD_RNG] operand_1_x; + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [`LM32_WORD_RNG] logic_result_x; +reg [`LM32_WORD_RNG] logic_result_x; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +integer logic_idx; + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +always @(*) +begin + for(logic_idx = 0; logic_idx < `LM32_WORD_WIDTH; logic_idx = logic_idx + 1) + logic_result_x[logic_idx] = logic_op_x[{operand_1_x[logic_idx], operand_0_x[logic_idx]}]; +end + +endmodule + diff --git a/verilog/lm32/lm32_mc_arithmetic.v b/verilog/lm32/lm32_mc_arithmetic.v new file mode 100644 index 00000000..d476d556 --- /dev/null +++ b/verilog/lm32/lm32_mc_arithmetic.v @@ -0,0 +1,309 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm_mc_arithmetic.v +// Title : Multi-cycle arithmetic unit. +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +`define LM32_MC_STATE_RNG 2:0 +`define LM32_MC_STATE_IDLE 3'b000 +`define LM32_MC_STATE_MULTIPLY 3'b001 +`define LM32_MC_STATE_MODULUS 3'b010 +`define LM32_MC_STATE_DIVIDE 3'b011 +`define LM32_MC_STATE_SHIFT_LEFT 3'b100 +`define LM32_MC_STATE_SHIFT_RIGHT 3'b101 + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_mc_arithmetic ( + // ----- Inputs ----- + clk_i, + rst_i, + stall_d, + kill_x, +`ifdef CFG_MC_DIVIDE_ENABLED + divide_d, + modulus_d, +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + multiply_d, +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + shift_left_d, + shift_right_d, + sign_extend_d, +`endif + operand_0_d, + operand_1_d, + // ----- Ouputs ----- + result_x, +`ifdef CFG_MC_DIVIDE_ENABLED + divide_by_zero_x, +`endif + stall_request_x + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset +input stall_d; // Stall instruction in D stage +input kill_x; // Kill instruction in X stage +`ifdef CFG_MC_DIVIDE_ENABLED +input divide_d; // Perform divide +input modulus_d; // Perform modulus +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED +input multiply_d; // Perform multiply +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +input shift_left_d; // Perform left shift +input shift_right_d; // Perform right shift +input sign_extend_d; // Whether to sign-extend (arithmetic) or zero-extend (logical) +`endif +input [`LM32_WORD_RNG] operand_0_d; +input [`LM32_WORD_RNG] operand_1_d; + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [`LM32_WORD_RNG] result_x; // Result of operation +reg [`LM32_WORD_RNG] result_x; +`ifdef CFG_MC_DIVIDE_ENABLED +output divide_by_zero_x; // A divide by zero was attempted +reg divide_by_zero_x; +`endif +output stall_request_x; // Request to stall pipeline from X stage back +wire stall_request_x; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +reg [`LM32_WORD_RNG] p; // Temporary registers +reg [`LM32_WORD_RNG] a; +reg [`LM32_WORD_RNG] b; +`ifdef CFG_MC_DIVIDE_ENABLED +wire [32:0] t; +`endif + +reg [`LM32_MC_STATE_RNG] state; // Current state of FSM +reg [5:0] cycles; // Number of cycles remaining in the operation + +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +reg sign_extend_x; // Whether to sign extend of zero extend right shifts +wire fill_value; // Value to fill with for right barrel-shifts +`endif + +///////////////////////////////////////////////////// +// Combinational logic +///////////////////////////////////////////////////// + +// Stall pipeline while any operation is being performed +assign stall_request_x = state != `LM32_MC_STATE_IDLE; + +`ifdef CFG_MC_DIVIDE_ENABLED +// Subtraction +assign t = {p[`LM32_WORD_WIDTH-2:0], a[`LM32_WORD_WIDTH-1]} - b; +`endif + +`ifdef CFG_MC_BARREL_SHIFT_ENABLED +// Determine fill value for right shift - Sign bit for arithmetic shift, or zero for logical shift +assign fill_value = (sign_extend_x == `TRUE) & b[`LM32_WORD_WIDTH-1]; +`endif + +///////////////////////////////////////////////////// +// Sequential logic +///////////////////////////////////////////////////// + +// Perform right shift +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + cycles <= {6{1'b0}}; + p <= {`LM32_WORD_WIDTH{1'b0}}; + a <= {`LM32_WORD_WIDTH{1'b0}}; + b <= {`LM32_WORD_WIDTH{1'b0}}; +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + sign_extend_x <= 1'b0; +`endif +`ifdef CFG_MC_DIVIDE_ENABLED + divide_by_zero_x <= `FALSE; +`endif + result_x <= {`LM32_WORD_WIDTH{1'b0}}; + state <= `LM32_MC_STATE_IDLE; + end + else + begin +`ifdef CFG_MC_DIVIDE_ENABLED + divide_by_zero_x <= `FALSE; +`endif + case (state) + `LM32_MC_STATE_IDLE: + begin + if (stall_d == `FALSE) + begin + cycles <= `LM32_WORD_WIDTH; + p <= 32'b0; + a <= operand_0_d; + b <= operand_1_d; +`ifdef CFG_MC_DIVIDE_ENABLED + if (divide_d == `TRUE) + state <= `LM32_MC_STATE_DIVIDE; + if (modulus_d == `TRUE) + state <= `LM32_MC_STATE_MODULUS; +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + if (multiply_d == `TRUE) + state <= `LM32_MC_STATE_MULTIPLY; +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + if (shift_left_d == `TRUE) + begin + state <= `LM32_MC_STATE_SHIFT_LEFT; + sign_extend_x <= sign_extend_d; + cycles <= operand_1_d[4:0]; + a <= operand_0_d; + b <= operand_0_d; + end + if (shift_right_d == `TRUE) + begin + state <= `LM32_MC_STATE_SHIFT_RIGHT; + sign_extend_x <= sign_extend_d; + cycles <= operand_1_d[4:0]; + a <= operand_0_d; + b <= operand_0_d; + end +`endif + end + end +`ifdef CFG_MC_DIVIDE_ENABLED + `LM32_MC_STATE_DIVIDE: + begin + if (t[32] == 1'b0) + begin + p <= t[31:0]; + a <= {a[`LM32_WORD_WIDTH-2:0], 1'b1}; + end + else + begin + p <= {p[`LM32_WORD_WIDTH-2:0], a[`LM32_WORD_WIDTH-1]}; + a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0}; + end + result_x <= a; + if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE)) + begin + // Check for divide by zero + divide_by_zero_x <= b == {`LM32_WORD_WIDTH{1'b0}}; + state <= `LM32_MC_STATE_IDLE; + end + cycles <= cycles - 1'b1; + end + `LM32_MC_STATE_MODULUS: + begin + if (t[32] == 1'b0) + begin + p <= t[31:0]; + a <= {a[`LM32_WORD_WIDTH-2:0], 1'b1}; + end + else + begin + p <= {p[`LM32_WORD_WIDTH-2:0], a[`LM32_WORD_WIDTH-1]}; + a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0}; + end + result_x <= p; + if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE)) + begin + // Check for divide by zero + divide_by_zero_x <= b == {`LM32_WORD_WIDTH{1'b0}}; + state <= `LM32_MC_STATE_IDLE; + end + cycles <= cycles - 1'b1; + end +`endif +`ifdef CFG_MC_MULTIPLY_ENABLED + `LM32_MC_STATE_MULTIPLY: + begin + if (b[0] == 1'b1) + p <= p + a; + b <= {1'b0, b[`LM32_WORD_WIDTH-1:1]}; + a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0}; + result_x <= p; + if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE)) + state <= `LM32_MC_STATE_IDLE; + cycles <= cycles - 1'b1; + end +`endif +`ifdef CFG_MC_BARREL_SHIFT_ENABLED + `LM32_MC_STATE_SHIFT_LEFT: + begin + a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0}; + result_x <= a; + if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE)) + state <= `LM32_MC_STATE_IDLE; + cycles <= cycles - 1'b1; + end + `LM32_MC_STATE_SHIFT_RIGHT: + begin + b <= {fill_value, b[`LM32_WORD_WIDTH-1:1]}; + result_x <= b; + if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE)) + state <= `LM32_MC_STATE_IDLE; + cycles <= cycles - 1'b1; + end +`endif + endcase + end +end + +endmodule diff --git a/verilog/lm32/lm32_multiplier.v b/verilog/lm32/lm32_multiplier.v new file mode 100644 index 00000000..d68d1503 --- /dev/null +++ b/verilog/lm32/lm32_multiplier.v @@ -0,0 +1,120 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_multiplier.v +// Title : Pipelined multiplier. +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_multiplier ( + // ----- Inputs ----- + clk_i, + rst_i, + stall_x, + stall_m, + operand_0, + operand_1, + // ----- Ouputs ----- + result + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset +input stall_x; // Stall instruction in X stage +input stall_m; // Stall instruction in M stage +input [`LM32_WORD_RNG] operand_0; // Muliplicand +input [`LM32_WORD_RNG] operand_1; // Multiplier + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [`LM32_WORD_RNG] result; // Product of multiplication +reg [`LM32_WORD_RNG] result; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +reg [`LM32_WORD_RNG] muliplicand; +reg [`LM32_WORD_RNG] multiplier; +reg [`LM32_WORD_RNG] product; + +///////////////////////////////////////////////////// +// Sequential logic +///////////////////////////////////////////////////// + +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + muliplicand <= {`LM32_WORD_WIDTH{1'b0}}; + multiplier <= {`LM32_WORD_WIDTH{1'b0}}; + product <= {`LM32_WORD_WIDTH{1'b0}}; + result <= {`LM32_WORD_WIDTH{1'b0}}; + end + else + begin + if (stall_x == `FALSE) + begin + muliplicand <= operand_0; + multiplier <= operand_1; + end + if (stall_m == `FALSE) + product <= muliplicand * multiplier; + result <= product; + end +end + +endmodule diff --git a/verilog/lm32/lm32_multiplier_spartan6.v b/verilog/lm32/lm32_multiplier_spartan6.v new file mode 100644 index 00000000..eb25754c --- /dev/null +++ b/verilog/lm32/lm32_multiplier_spartan6.v @@ -0,0 +1,193 @@ +/* + * Milkymist SoC + * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +module lm32_multiplier( + input clk_i, + input rst_i, + input stall_x, + input stall_m, + input [31:0] operand_0, + input [31:0] operand_1, + output [31:0] result +); + +// See UG389, esp. p. 29 "Fully Pipelined, 35 x 35 Multiplier Use Model (Large Multiplier)" + +wire [17:0] au = {3'd0, operand_0[31:17]}; +wire [17:0] al = {1'b0, operand_0[16:0]}; +wire [17:0] bu = {3'd0, operand_1[31:17]}; +wire [17:0] bl = {1'b0, operand_1[16:0]}; + +wire [17:0] bl_forward; +wire [35:0] al_bl; + +reg [16:0] result_low; +always @(posedge clk_i) begin + if(rst_i) + result_low <= 17'd0; + else + result_low <= al_bl[16:0]; +end +assign result[16:0] = result_low; + +DSP48A1 #( + .A0REG(1), + .A1REG(0), + .B0REG(1), + .B1REG(0), + .CARRYINREG(0), + .CARRYINSEL("OPMODE5"), + .CARRYOUTREG(0), + .CREG(0), + .DREG(0), + .MREG(1), + .OPMODEREG(0), + .PREG(0), + .RSTTYPE("SYNC") +) D1 ( + .BCOUT(bl_forward), + .PCOUT(), + .CARRYOUT(), + .CARRYOUTF(), + .M(al_bl), + .P(), + .PCIN(), + .CLK(clk_i), + .OPMODE(8'd1), + .A(al), + .B(bl), + .C(), + .CARRYIN(), + .D(), + .CEA(~stall_x), + .CEB(~stall_x), + .CEC(), + .CECARRYIN(), + .CED(), + .CEM(~stall_m), + .CEOPMODE(), + .CEP(1'b1), + .RSTA(rst_i), + .RSTB(rst_i), + .RSTC(), + .RSTCARRYIN(), + .RSTD(), + .RSTM(rst_i), + .RSTOPMODE(), + .RSTP() +); + +wire [47:0] au_bl_sum; + +DSP48A1 #( + .A0REG(1), + .A1REG(0), + .B0REG(0), + .B1REG(0), + .CARRYINREG(0), + .CARRYINSEL("OPMODE5"), + .CARRYOUTREG(0), + .CREG(0), + .DREG(0), + .MREG(1), + .OPMODEREG(0), + .PREG(0), + .RSTTYPE("SYNC") +) D2 ( + .BCOUT(), + .PCOUT(au_bl_sum), + .CARRYOUT(), + .CARRYOUTF(), + .M(), + .P(), + .PCIN(), + .CLK(clk_i), + .OPMODE(8'd13), + .A(au), + .B(bl_forward), + .C({31'd0, al_bl[33:17]}), + .CARRYIN(), + .D(), + .CEA(~stall_x), + .CEB(), + .CEC(), + .CECARRYIN(), + .CED(), + .CEM(~stall_m), + .CEOPMODE(), + .CEP(), + .RSTA(rst_i), + .RSTB(), + .RSTC(), + .RSTCARRYIN(), + .RSTD(), + .RSTM(rst_i), + .RSTOPMODE(), + .RSTP() +); + +wire [47:0] r_full; +assign result[31:17] = r_full[16:0]; + +DSP48A1 #( + .A0REG(1), + .A1REG(0), + .B0REG(1), + .B1REG(0), + .CARRYINREG(0), + .CARRYINSEL("OPMODE5"), + .CARRYOUTREG(0), + .CREG(0), + .DREG(0), + .MREG(1), + .OPMODEREG(0), + .PREG(1), + .RSTTYPE("SYNC") +) D3 ( + .BCOUT(), + .PCOUT(), + .CARRYOUT(), + .CARRYOUTF(), + .M(), + .P(r_full), + .PCIN(au_bl_sum), + .CLK(clk_i), + .OPMODE(8'd5), + .A(bu), + .B(al), + .C(), + .CARRYIN(), + .D(), + .CEA(~stall_x), + .CEB(~stall_x), + .CEC(), + .CECARRYIN(), + .CED(), + .CEM(~stall_m), + .CEOPMODE(), + .CEP(1'b1), + .RSTA(rst_i), + .RSTB(rst_i), + .RSTC(), + .RSTCARRYIN(), + .RSTD(), + .RSTM(rst_i), + .RSTOPMODE(), + .RSTP(rst_i) +); + +endmodule diff --git a/verilog/lm32/lm32_ram.v b/verilog/lm32/lm32_ram.v new file mode 100644 index 00000000..d84352f9 --- /dev/null +++ b/verilog/lm32/lm32_ram.v @@ -0,0 +1,128 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_ram.v +// Title : Pseudo dual-port RAM. +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : Options added to select EBRs (True-DP, Psuedo-DP, DQ, or +// : Distributed RAM). +// Version : 3.2 +// : EBRs use SYNC resets instead of ASYNC resets. +// Version : 3.5 +// : Added read-after-write hazard resolution when using true +// : dual-port EBRs +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_ram + ( + // ----- Inputs ------- + read_clk, + write_clk, + reset, + enable_read, + read_address, + enable_write, + write_address, + write_data, + write_enable, + // ----- Outputs ------- + read_data + ); + +/*---------------------------------------------------------------------- + Parameters + ----------------------------------------------------------------------*/ +parameter data_width = 1; // Width of the data ports +parameter address_width = 1; // Width of the address ports + +/*---------------------------------------------------------------------- + Inputs + ----------------------------------------------------------------------*/ +input read_clk; // Read clock +input write_clk; // Write clock +input reset; // Reset + +input enable_read; // Access enable +input [address_width-1:0] read_address; // Read/write address +input enable_write; // Access enable +input [address_width-1:0] write_address;// Read/write address +input [data_width-1:0] write_data; // Data to write to specified address +input write_enable; // Write enable + +/*---------------------------------------------------------------------- + Outputs + ----------------------------------------------------------------------*/ +output [data_width-1:0] read_data; // Data read from specified addess +wire [data_width-1:0] read_data; + +/*---------------------------------------------------------------------- + Internal nets and registers + ----------------------------------------------------------------------*/ +reg [data_width-1:0] mem[0:(1<>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_shifter.v +// Title : Barrel shifter +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : Initial Release +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_shifter ( + // ----- Inputs ------- + clk_i, + rst_i, + stall_x, + direction_x, + sign_extend_x, + operand_0_x, + operand_1_x, + // ----- Outputs ------- + shifter_result_m + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset +input stall_x; // Stall instruction in X stage +input direction_x; // Direction to shift +input sign_extend_x; // Whether shift is arithmetic (1'b1) or logical (1'b0) +input [`LM32_WORD_RNG] operand_0_x; // Operand to shift +input [`LM32_WORD_RNG] operand_1_x; // Operand that specifies how many bits to shift by + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +output [`LM32_WORD_RNG] shifter_result_m; // Result of shift +wire [`LM32_WORD_RNG] shifter_result_m; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +reg direction_m; +reg [`LM32_WORD_RNG] left_shift_result; +reg [`LM32_WORD_RNG] right_shift_result; +reg [`LM32_WORD_RNG] left_shift_operand; +wire [`LM32_WORD_RNG] right_shift_operand; +wire fill_value; +wire [`LM32_WORD_RNG] right_shift_in; + +integer shift_idx_0; +integer shift_idx_1; + +///////////////////////////////////////////////////// +// Combinational Logic +///////////////////////////////////////////////////// + +// Select operands - To perform a left shift, we reverse the bits and perform a right shift +always @(*) +begin + for (shift_idx_0 = 0; shift_idx_0 < `LM32_WORD_WIDTH; shift_idx_0 = shift_idx_0 + 1) + left_shift_operand[`LM32_WORD_WIDTH-1-shift_idx_0] = operand_0_x[shift_idx_0]; +end +assign right_shift_operand = direction_x == `LM32_SHIFT_OP_LEFT ? left_shift_operand : operand_0_x; + +// Determine fill value for right shift - Sign bit for arithmetic shift, or zero for logical shift +assign fill_value = (sign_extend_x == `TRUE) && (direction_x == `LM32_SHIFT_OP_RIGHT) + ? operand_0_x[`LM32_WORD_WIDTH-1] + : 1'b0; + +// Determine bits to shift in for right shift or rotate +assign right_shift_in = {`LM32_WORD_WIDTH{fill_value}}; + +// Reverse bits to get left shift result +always @(*) +begin + for (shift_idx_1 = 0; shift_idx_1 < `LM32_WORD_WIDTH; shift_idx_1 = shift_idx_1 + 1) + left_shift_result[`LM32_WORD_WIDTH-1-shift_idx_1] = right_shift_result[shift_idx_1]; +end + +// Select result +assign shifter_result_m = direction_m == `LM32_SHIFT_OP_LEFT ? left_shift_result : right_shift_result; + +///////////////////////////////////////////////////// +// Sequential Logic +///////////////////////////////////////////////////// + +// Perform right shift +always @(posedge clk_i `CFG_RESET_SENSITIVITY) +begin + if (rst_i == `TRUE) + begin + right_shift_result <= {`LM32_WORD_WIDTH{1'b0}}; + direction_m <= `FALSE; + end + else + begin + if (stall_x == `FALSE) + begin + right_shift_result <= {right_shift_in, right_shift_operand} >> operand_1_x[`LM32_SHIFT_RNG]; + direction_m <= direction_x; + end + end +end + +endmodule diff --git a/verilog/lm32/lm32_top.v b/verilog/lm32/lm32_top.v new file mode 100644 index 00000000..c03e280b --- /dev/null +++ b/verilog/lm32/lm32_top.v @@ -0,0 +1,354 @@ +// ================================================================== +// >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<< +// ------------------------------------------------------------------ +// Copyright (c) 2006-2011 by Lattice Semiconductor Corporation +// ALL RIGHTS RESERVED +// ------------------------------------------------------------------ +// +// IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM. +// +// Permission: +// +// Lattice Semiconductor grants permission to use this code +// pursuant to the terms of the Lattice Semiconductor Corporation +// Open Source License Agreement. +// +// Disclaimer: +// +// Lattice Semiconductor provides no warranty regarding the use or +// functionality of this code. It is the user's responsibility to +// verify the user's design for consistency and functionality through +// the use of formal verification methods. +// +// -------------------------------------------------------------------- +// +// Lattice Semiconductor Corporation +// 5555 NE Moore Court +// Hillsboro, OR 97214 +// U.S.A +// +// TEL: 1-800-Lattice (USA and Canada) +// 503-286-8001 (other locations) +// +// web: http://www.latticesemi.com/ +// email: techsupport@latticesemi.com +// +// -------------------------------------------------------------------- +// FILE DETAILS +// Project : LatticeMico32 +// File : lm32_top.v +// Title : Top-level of CPU. +// Dependencies : lm32_include.v +// Version : 6.1.17 +// : removed SPI - 04/12/07 +// Version : 7.0SP2, 3.0 +// : No Change +// Version : 3.1 +// : No Change +// ============================================================================= + +`include "lm32_include.v" + +///////////////////////////////////////////////////// +// Module interface +///////////////////////////////////////////////////// + +module lm32_top ( + // ----- Inputs ------- + clk_i, + rst_i, +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + at_debug, + `endif +`endif + // From external devices +`ifdef CFG_INTERRUPTS_ENABLED + interrupt, +`endif + // From user logic +`ifdef CFG_USER_ENABLED + user_result, + user_complete, +`endif +`ifdef CFG_IWB_ENABLED + // Instruction Wishbone master + I_DAT_I, + I_ACK_I, + I_ERR_I, + I_RTY_I, +`endif +`ifdef CFG_EXTERNAL_BREAK_ENABLED + ext_break, +`endif + // Data Wishbone master + D_DAT_I, + D_ACK_I, + D_ERR_I, + D_RTY_I, + // ----- Outputs ------- +`ifdef CFG_USER_ENABLED + user_valid, + user_opcode, + user_operand_0, + user_operand_1, +`endif +`ifdef CFG_IWB_ENABLED + // Instruction Wishbone master + I_DAT_O, + I_ADR_O, + I_CYC_O, + I_SEL_O, + I_STB_O, + I_WE_O, + I_CTI_O, + I_LOCK_O, + I_BTE_O, +`endif + // Data Wishbone master + D_DAT_O, + D_ADR_O, + D_CYC_O, + D_SEL_O, + D_STB_O, + D_WE_O, + D_CTI_O, + D_LOCK_O, + D_BTE_O + ); + +///////////////////////////////////////////////////// +// Inputs +///////////////////////////////////////////////////// + +input clk_i; // Clock +input rst_i; // Reset + +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + input at_debug; // GPIO input that maps EBA to DEBA + `endif +`endif + +`ifdef CFG_INTERRUPTS_ENABLED +input [`LM32_INTERRUPT_RNG] interrupt; // Interrupt pins +`endif + +`ifdef CFG_USER_ENABLED +input [`LM32_WORD_RNG] user_result; // User-defined instruction result +input user_complete; // Indicates the user-defined instruction result is valid +`endif + +`ifdef CFG_IWB_ENABLED +input [`LM32_WORD_RNG] I_DAT_I; // Instruction Wishbone interface read data +input I_ACK_I; // Instruction Wishbone interface acknowledgement +input I_ERR_I; // Instruction Wishbone interface error +input I_RTY_I; // Instruction Wishbone interface retry +`endif + +input [`LM32_WORD_RNG] D_DAT_I; // Data Wishbone interface read data +input D_ACK_I; // Data Wishbone interface acknowledgement +input D_ERR_I; // Data Wishbone interface error +input D_RTY_I; // Data Wishbone interface retry + +`ifdef CFG_EXTERNAL_BREAK_ENABLED +input ext_break; +`endif + +///////////////////////////////////////////////////// +// Outputs +///////////////////////////////////////////////////// + +`ifdef CFG_USER_ENABLED +output user_valid; // Indicates that user_opcode and user_operand_* are valid +wire user_valid; +output [`LM32_USER_OPCODE_RNG] user_opcode; // User-defined instruction opcode +reg [`LM32_USER_OPCODE_RNG] user_opcode; +output [`LM32_WORD_RNG] user_operand_0; // First operand for user-defined instruction +wire [`LM32_WORD_RNG] user_operand_0; +output [`LM32_WORD_RNG] user_operand_1; // Second operand for user-defined instruction +wire [`LM32_WORD_RNG] user_operand_1; +`endif + +`ifdef CFG_IWB_ENABLED +output [`LM32_WORD_RNG] I_DAT_O; // Instruction Wishbone interface write data +wire [`LM32_WORD_RNG] I_DAT_O; +output [`LM32_WORD_RNG] I_ADR_O; // Instruction Wishbone interface address +wire [`LM32_WORD_RNG] I_ADR_O; +output I_CYC_O; // Instruction Wishbone interface cycle +wire I_CYC_O; +output [`LM32_BYTE_SELECT_RNG] I_SEL_O; // Instruction Wishbone interface byte select +wire [`LM32_BYTE_SELECT_RNG] I_SEL_O; +output I_STB_O; // Instruction Wishbone interface strobe +wire I_STB_O; +output I_WE_O; // Instruction Wishbone interface write enable +wire I_WE_O; +output [`LM32_CTYPE_RNG] I_CTI_O; // Instruction Wishbone interface cycle type +wire [`LM32_CTYPE_RNG] I_CTI_O; +output I_LOCK_O; // Instruction Wishbone interface lock bus +wire I_LOCK_O; +output [`LM32_BTYPE_RNG] I_BTE_O; // Instruction Wishbone interface burst type +wire [`LM32_BTYPE_RNG] I_BTE_O; +`endif + +output [`LM32_WORD_RNG] D_DAT_O; // Data Wishbone interface write data +wire [`LM32_WORD_RNG] D_DAT_O; +output [`LM32_WORD_RNG] D_ADR_O; // Data Wishbone interface address +wire [`LM32_WORD_RNG] D_ADR_O; +output D_CYC_O; // Data Wishbone interface cycle +wire D_CYC_O; +output [`LM32_BYTE_SELECT_RNG] D_SEL_O; // Data Wishbone interface byte select +wire [`LM32_BYTE_SELECT_RNG] D_SEL_O; +output D_STB_O; // Data Wishbone interface strobe +wire D_STB_O; +output D_WE_O; // Data Wishbone interface write enable +wire D_WE_O; +output [`LM32_CTYPE_RNG] D_CTI_O; // Data Wishbone interface cycle type +wire [`LM32_CTYPE_RNG] D_CTI_O; +output D_LOCK_O; // Date Wishbone interface lock bus +wire D_LOCK_O; +output [`LM32_BTYPE_RNG] D_BTE_O; // Data Wishbone interface burst type +wire [`LM32_BTYPE_RNG] D_BTE_O; + +///////////////////////////////////////////////////// +// Internal nets and registers +///////////////////////////////////////////////////// + +`ifdef CFG_JTAG_ENABLED +// Signals between JTAG interface and CPU +wire [`LM32_BYTE_RNG] jtag_reg_d; +wire [`LM32_BYTE_RNG] jtag_reg_q; +wire jtag_update; +wire [2:0] jtag_reg_addr_d; +wire [2:0] jtag_reg_addr_q; +wire jtck; +wire jrstn; +`endif + +// TODO: get the trace signals out +`ifdef CFG_TRACE_ENABLED +// PC trace signals +wire [`LM32_PC_RNG] trace_pc; // PC to trace (address of next non-sequential instruction) +wire trace_pc_valid; // Indicates that a new trace PC is valid +wire trace_exception; // Indicates an exception has occured +wire [`LM32_EID_RNG] trace_eid; // Indicates what type of exception has occured +wire trace_eret; // Indicates an eret instruction has been executed +`ifdef CFG_DEBUG_ENABLED +wire trace_bret; // Indicates a bret instruction has been executed +`endif +`endif + +///////////////////////////////////////////////////// +// Functions +///////////////////////////////////////////////////// + +`include "lm32_functions.v" +///////////////////////////////////////////////////// +// Instantiations +///////////////////////////////////////////////////// + +// LM32 CPU +lm32_cpu cpu ( + // ----- Inputs ------- + .clk_i (clk_i), +`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE + .clk_n_i (clk_n), +`endif + .rst_i (rst_i), +`ifdef CFG_DEBUG_ENABLED + `ifdef CFG_ALTERNATE_EBA + .at_debug (at_debug), + `endif +`endif + // From external devices +`ifdef CFG_INTERRUPTS_ENABLED + .interrupt (interrupt), +`endif + // From user logic +`ifdef CFG_USER_ENABLED + .user_result (user_result), + .user_complete (user_complete), +`endif +`ifdef CFG_JTAG_ENABLED + // From JTAG + .jtag_clk (jtck), + .jtag_update (jtag_update), + .jtag_reg_q (jtag_reg_q), + .jtag_reg_addr_q (jtag_reg_addr_q), +`endif +`ifdef CFG_EXTERNAL_BREAK_ENABLED + .ext_break (ext_break), +`endif +`ifdef CFG_IWB_ENABLED + // Instruction Wishbone master + .I_DAT_I (I_DAT_I), + .I_ACK_I (I_ACK_I), + .I_ERR_I (I_ERR_I), + .I_RTY_I (I_RTY_I), +`endif + // Data Wishbone master + .D_DAT_I (D_DAT_I), + .D_ACK_I (D_ACK_I), + .D_ERR_I (D_ERR_I), + .D_RTY_I (D_RTY_I), + // ----- Outputs ------- +`ifdef CFG_TRACE_ENABLED + .trace_pc (trace_pc), + .trace_pc_valid (trace_pc_valid), + .trace_exception (trace_exception), + .trace_eid (trace_eid), + .trace_eret (trace_eret), +`ifdef CFG_DEBUG_ENABLED + .trace_bret (trace_bret), +`endif +`endif +`ifdef CFG_JTAG_ENABLED + .jtag_reg_d (jtag_reg_d), + .jtag_reg_addr_d (jtag_reg_addr_d), +`endif +`ifdef CFG_USER_ENABLED + .user_valid (user_valid), + .user_opcode (user_opcode), + .user_operand_0 (user_operand_0), + .user_operand_1 (user_operand_1), +`endif +`ifdef CFG_IWB_ENABLED + // Instruction Wishbone master + .I_DAT_O (I_DAT_O), + .I_ADR_O (I_ADR_O), + .I_CYC_O (I_CYC_O), + .I_SEL_O (I_SEL_O), + .I_STB_O (I_STB_O), + .I_WE_O (I_WE_O), + .I_CTI_O (I_CTI_O), + .I_LOCK_O (I_LOCK_O), + .I_BTE_O (I_BTE_O), + `endif + // Data Wishbone master + .D_DAT_O (D_DAT_O), + .D_ADR_O (D_ADR_O), + .D_CYC_O (D_CYC_O), + .D_SEL_O (D_SEL_O), + .D_STB_O (D_STB_O), + .D_WE_O (D_WE_O), + .D_CTI_O (D_CTI_O), + .D_LOCK_O (D_LOCK_O), + .D_BTE_O (D_BTE_O) + ); + +`ifdef CFG_JTAG_ENABLED +// JTAG cores +jtag_cores jtag_cores ( + // ----- Inputs ----- + .reg_d (jtag_reg_d), + .reg_addr_d (jtag_reg_addr_d), + // ----- Outputs ----- + .reg_update (jtag_update), + .reg_q (jtag_reg_q), + .reg_addr_q (jtag_reg_addr_q), + .jtck (jtck), + .jrstn (jrstn) + ); +`endif + +endmodule diff --git a/verilog/uart/uart.v b/verilog/uart/uart.v new file mode 100644 index 00000000..6412804c --- /dev/null +++ b/verilog/uart/uart.v @@ -0,0 +1,142 @@ +/* + * Milkymist SoC + * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +module uart #( + parameter csr_addr = 4'h0, + parameter clk_freq = 100000000, + parameter baud = 115200, + parameter break_en_default = 1'b0 +) ( + input sys_clk, + input sys_rst, + + input [13:0] csr_a, + input csr_we, + input [31:0] csr_di, + output reg [31:0] csr_do, + + output irq, + + input uart_rx, + output uart_tx, + + output break +); + +reg [15:0] divisor; +wire [7:0] rx_data; +wire [7:0] tx_data; +wire tx_wr; + +wire uart_tx_transceiver; + +uart_transceiver transceiver( + .sys_clk(sys_clk), + .sys_rst(sys_rst), + + .uart_rx(uart_rx), + .uart_tx(uart_tx_transceiver), + + .divisor(divisor), + + .rx_data(rx_data), + .rx_done(rx_done), + + .tx_data(tx_data), + .tx_wr(tx_wr), + .tx_done(tx_done), + + .break(break_transceiver) +); + +assign uart_tx = thru_en ? uart_rx : uart_tx_transceiver; +assign break = break_en & break_transceiver; + +/* CSR interface */ +wire csr_selected = csr_a[13:10] == csr_addr; + +assign irq = (tx_event & tx_irq_en) | (rx_event & rx_irq_en); + +assign tx_data = csr_di[7:0]; +assign tx_wr = csr_selected & csr_we & (csr_a[2:0] == 3'b000); + +parameter default_divisor = clk_freq/baud/16; + +reg thru_en; +reg break_en; +reg tx_irq_en; +reg rx_irq_en; +reg rx_event; +reg tx_event; +reg thre; + +always @(posedge sys_clk) begin + if(sys_rst) begin + divisor <= default_divisor; + csr_do <= 32'd0; + thru_en <= 1'b0; + break_en <= break_en_default; + rx_irq_en <= 1'b0; + tx_irq_en <= 1'b0; + tx_event <= 1'b0; + rx_event <= 1'b0; + thre <= 1'b1; + end else begin + csr_do <= 32'd0; + if(break) + break_en <= 1'b0; + if(tx_done) begin + tx_event <= 1'b1; + thre <= 1'b1; + end + if(tx_wr) + thre <= 1'b0; + if(rx_done) begin + rx_event <= 1'b1; + end + if(csr_selected) begin + case(csr_a[2:0]) + 3'b000: csr_do <= rx_data; + 3'b001: csr_do <= divisor; + 3'b010: csr_do <= {tx_event, rx_event, thre}; + 3'b011: csr_do <= {thru_en, tx_irq_en, rx_irq_en}; + 3'b100: csr_do <= {break_en}; + endcase + if(csr_we) begin + case(csr_a[2:0]) + 3'b000:; /* handled by transceiver */ + 3'b001: divisor <= csr_di[15:0]; + 3'b010: begin + /* write one to clear */ + if(csr_di[1]) + rx_event <= 1'b0; + if(csr_di[2]) + tx_event <= 1'b0; + end + 3'b011: begin + rx_irq_en <= csr_di[0]; + tx_irq_en <= csr_di[1]; + thru_en <= csr_di[2]; + end + 3'b100: break_en <= csr_di[0]; + endcase + end + end + end +end + +endmodule diff --git a/verilog/uart/uart_transceiver.v b/verilog/uart/uart_transceiver.v new file mode 100644 index 00000000..80bd93bf --- /dev/null +++ b/verilog/uart/uart_transceiver.v @@ -0,0 +1,165 @@ +/* + * Milkymist SoC + * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq + * Copyright (C) 2007 Das Labor + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +module uart_transceiver( + input sys_rst, + input sys_clk, + + input uart_rx, + output reg uart_tx, + + input [15:0] divisor, + + output reg [7:0] rx_data, + output reg rx_done, + + input [7:0] tx_data, + input tx_wr, + output reg tx_done, + + output reg break +); + +//----------------------------------------------------------------- +// enable16 generator +//----------------------------------------------------------------- +reg [15:0] enable16_counter; + +wire enable16; +assign enable16 = (enable16_counter == 16'd0); + +always @(posedge sys_clk) begin + if(sys_rst) + enable16_counter <= divisor - 16'b1; + else begin + enable16_counter <= enable16_counter - 16'd1; + if(enable16) + enable16_counter <= divisor - 16'b1; + end +end + +//----------------------------------------------------------------- +// Synchronize uart_rx +//----------------------------------------------------------------- +reg uart_rx1; +reg uart_rx2; + +always @(posedge sys_clk) begin + uart_rx1 <= uart_rx; + uart_rx2 <= uart_rx1; +end + +//----------------------------------------------------------------- +// UART RX Logic +//----------------------------------------------------------------- +reg rx_busy; +reg uart_rx_r; +reg [3:0] rx_count16; +reg [3:0] rx_bitcount; +reg [7:0] rx_reg; + +always @(posedge sys_clk) begin + if(sys_rst) begin + rx_done <= 1'b0; + rx_busy <= 1'b0; + rx_count16 <= 4'd0; + rx_bitcount <= 4'd0; + break <= 1'b0; + uart_rx_r <= 1'b0; + end else begin + rx_done <= 1'b0; + break <= 1'b0; + + if(enable16) begin + uart_rx_r <= uart_rx2; + if(~rx_busy) begin // look for start bit + if(~uart_rx2 & uart_rx_r) begin // start bit found + rx_busy <= 1'b1; + rx_count16 <= 4'd7; + rx_bitcount <= 4'd0; + end + end else begin + rx_count16 <= rx_count16 + 4'd1; + + if(rx_count16 == 4'd0) begin // sample + rx_bitcount <= rx_bitcount + 4'd1; + + if(rx_bitcount == 4'd0) begin // verify startbit + if(uart_rx2) + rx_busy <= 1'b0; + end else if(rx_bitcount == 4'd9) begin + rx_busy <= 1'b0; + if(uart_rx2) begin // stop bit ok + rx_data <= rx_reg; + rx_done <= 1'b1; + end else if(rx_reg == 8'h00) // break condition + break <= 1'b1; + end else + rx_reg <= {uart_rx2, rx_reg[7:1]}; + end + end + end + end +end + +//----------------------------------------------------------------- +// UART TX Logic +//----------------------------------------------------------------- +reg tx_busy; +reg [3:0] tx_bitcount; +reg [3:0] tx_count16; +reg [7:0] tx_reg; + +always @(posedge sys_clk) begin + if(sys_rst) begin + tx_done <= 1'b0; + tx_busy <= 1'b0; + uart_tx <= 1'b1; + end else begin + tx_done <= 1'b0; + if(tx_wr) begin + tx_reg <= tx_data; + tx_bitcount <= 4'd0; + tx_count16 <= 4'd1; + tx_busy <= 1'b1; + uart_tx <= 1'b0; +`ifdef SIMULATION + $display("UART: %c", tx_data); +`endif + end else if(enable16 && tx_busy) begin + tx_count16 <= tx_count16 + 4'd1; + + if(tx_count16 == 4'd0) begin + tx_bitcount <= tx_bitcount + 4'd1; + + if(tx_bitcount == 4'd8) begin + uart_tx <= 1'b1; + end else if(tx_bitcount == 4'd9) begin + uart_tx <= 1'b1; + tx_busy <= 1'b0; + tx_done <= 1'b1; + end else begin + uart_tx <= tx_reg[0]; + tx_reg <= {1'b0, tx_reg[7:1]}; + end + end + end + end +end + +endmodule -- 2.30.2