Initial import
authorSebastien Bourdeauducq <sebastien@milkymist.org>
Tue, 13 Dec 2011 16:33:12 +0000 (17:33 +0100)
committerSebastien Bourdeauducq <sebastien@milkymist.org>
Tue, 13 Dec 2011 16:33:12 +0000 (17:33 +0100)
37 files changed:
.gitignore [new file with mode: 0644]
build.py [new file with mode: 0644]
build/.keep_me [new file with mode: 0644]
constraints.py [new file with mode: 0644]
milkymist/__init__.py [new file with mode: 0644]
milkymist/lm32/__init__.py [new file with mode: 0644]
milkymist/norflash/__init__.py [new file with mode: 0644]
milkymist/uart/__init__.py [new file with mode: 0644]
tb/norflash/Makefile [new file with mode: 0644]
tb/norflash/norflash_conv.py [new file with mode: 0644]
tb/norflash/tb_norflash.v [new file with mode: 0644]
top.py [new file with mode: 0644]
verilog/lm32/jtag_cores.v [new file with mode: 0644]
verilog/lm32/jtag_tap_spartan6.v [new file with mode: 0644]
verilog/lm32/lm32_adder.v [new file with mode: 0644]
verilog/lm32/lm32_addsub.v [new file with mode: 0644]
verilog/lm32/lm32_cpu.v [new file with mode: 0644]
verilog/lm32/lm32_dcache.v [new file with mode: 0644]
verilog/lm32/lm32_debug.v [new file with mode: 0644]
verilog/lm32/lm32_decoder.v [new file with mode: 0644]
verilog/lm32/lm32_dp_ram.v [new file with mode: 0644]
verilog/lm32/lm32_functions.v [new file with mode: 0644]
verilog/lm32/lm32_icache.v [new file with mode: 0644]
verilog/lm32/lm32_include.v [new file with mode: 0644]
verilog/lm32/lm32_instruction_unit.v [new file with mode: 0644]
verilog/lm32/lm32_interrupt.v [new file with mode: 0644]
verilog/lm32/lm32_jtag.v [new file with mode: 0644]
verilog/lm32/lm32_load_store_unit.v [new file with mode: 0644]
verilog/lm32/lm32_logic_op.v [new file with mode: 0644]
verilog/lm32/lm32_mc_arithmetic.v [new file with mode: 0644]
verilog/lm32/lm32_multiplier.v [new file with mode: 0644]
verilog/lm32/lm32_multiplier_spartan6.v [new file with mode: 0644]
verilog/lm32/lm32_ram.v [new file with mode: 0644]
verilog/lm32/lm32_shifter.v [new file with mode: 0644]
verilog/lm32/lm32_top.v [new file with mode: 0644]
verilog/uart/uart.v [new file with mode: 0644]
verilog/uart/uart_transceiver.v [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..597bc71
--- /dev/null
@@ -0,0 +1,2 @@
+__pycache__
+build/*
diff --git a/build.py b/build.py
new file mode 100644 (file)
index 0000000..0eb19cd
--- /dev/null
+++ b/build.py
@@ -0,0 +1,62 @@
+import os
+import top
+
+# list Verilog sources before changing directory
+verilog_sources = []
+def add_core_dir(d):
+       for root, subFolders, files in os.walk(os.path.join("verilog", d)):
+               for f in files:
+                       verilog_sources.append(os.path.join(root, f))
+def add_core_files(d, files):
+       for f in files:
+               verilog_sources.append(os.path.join("verilog", d, f))
+add_core_files("lm32", ["lm32_cpu.v", "lm32_instruction_unit.v", "lm32_decoder.v",
+       "lm32_load_store_unit.v", "lm32_adder.v", "lm32_addsub.v", "lm32_logic_op.v",
+       "lm32_shifter.v", "lm32_multiplier_spartan6.v", "lm32_mc_arithmetic.v",
+       "lm32_interrupt.v", "lm32_ram.v", "lm32_dp_ram.v", "lm32_icache.v",
+       "lm32_dcache.v", "lm32_top.v", "lm32_debug.v", "lm32_jtag.v", "jtag_cores.v",
+       "jtag_tap_spartan6.v"])
+add_core_dir("uart")
+
+os.system("rm -rf build/*")
+os.chdir("build")
+
+def str2file(filename, contents):
+       f = open(filename, 'w')
+       f.write(contents)
+       f.close()
+
+# generate source
+(src_verilog, src_ucf) = top.Get()
+str2file("soc.v", src_verilog)
+str2file("soc.ucf", src_ucf)
+verilog_sources.append("build/soc.v")
+
+# xst
+xst_prj = ""
+for s in verilog_sources:
+       xst_prj += "verilog work ../" + s + "\n"
+str2file("soc.prj", xst_prj)
+str2file("soc.xst", """run
+-ifn soc.prj
+-top soc
+-ifmt MIXED
+-opt_mode SPEED
+-opt_level 2
+-resource_sharing no
+-reduce_control_sets auto
+-ofn soc.ngc
+-p xc6slx45-fgg484-2""")
+os.system("xst -ifn soc.xst")
+
+# ngdbuild
+os.system("ngdbuild -uc soc.ucf soc.ngc")
+
+# map
+os.system("map -ol high -w soc.ngd")
+
+# par
+os.system("par -ol high -w soc.ncd soc-routed.ncd")
+
+# bitgen
+os.system("bitgen -g LCK_cycle:6 -g Binary:Yes -g INIT_9K:Yes -w soc-routed.ncd soc.bit")
diff --git a/build/.keep_me b/build/.keep_me
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/constraints.py b/constraints.py
new file mode 100644 (file)
index 0000000..7d949d3
--- /dev/null
@@ -0,0 +1,45 @@
+def Get(ns, norflash0, uart0):
+       constraints = []
+       def add(signal, pin, vec=-1, iostandard="LVCMOS33", extra=""):
+               constraints.append((ns.GetName(signal), vec, pin, iostandard, extra))
+       def add_vec(signal, pins, iostandard="LVCMOS33", extra=""):
+               i = 0
+               for p in pins:
+                       add(signal, p, i, iostandard, extra)
+                       i += 1
+       
+       add_vec(norflash0.adr, ["L22", "L20", "K22", "K21", "J19", "H20", "F22",
+               "F21", "K17", "J17", "E22", "E20", "H18", "H19", "F20",
+               "G19", "C22", "C20", "D22", "D21", "F19", "F18", "D20", "D19"],
+               extra="SLEW = FAST | DRIVE = 8")
+       add_vec(norflash0.d, ["AA20", "U14", "U13", "AA6", "AB6", "W4", "Y4", "Y7",
+               "AA2", "AB2", "V15", "AA18", "AB18", "Y13", "AA12", "AB12"],
+               extra = "SLEW = FAST | DRIVE = 8 | PULLDOWN")
+       add(norflash0.oe_n, "M22", extra="SLEW = FAST | DRIVE = 8")
+       add(norflash0.we_n, "N20", extra="SLEW = FAST | DRIVE = 8")
+       add(norflash0.ce_n, "M21", extra="SLEW = FAST | DRIVE = 8")
+       add(norflash0.rst_n, "P22", extra="SLEW = FAST | DRIVE = 8")
+       
+       add(uart0.tx, "L17", extra="SLEW = SLOW")
+       add(uart0.rx, "K18", extra="PULLUP")
+       
+       r = ""
+       for c in constraints:
+               r += "NET \"" + c[0]
+               if c[1] >= 0:
+                       r += "(" + str(c[1]) + ")"
+               r += "\" LOC = " + c[2] 
+               r += " | IOSTANDARD = " + c[3]
+               if c[4]:
+                       r += " | " + c[4]
+               r += ";\n"
+       
+       r += """
+NET "sys_clk" LOC = AB11 | IOSTANDARD = LVCMOS33;
+NET "sys_clk" TNM_NET = "GRPclk50";
+TIMESPEC "TSclk50" = PERIOD "GRPclk50" 20 ns HIGH 50%;
+
+NET "sys_rst" LOC = AA4 | IOSTANDARD = LVCMOS33;
+       """
+       
+       return r
diff --git a/milkymist/__init__.py b/milkymist/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/milkymist/lm32/__init__.py b/milkymist/lm32/__init__.py
new file mode 100644 (file)
index 0000000..3a26c42
--- /dev/null
@@ -0,0 +1,49 @@
+from migen.fhdl import structure as f
+from migen.bus import wishbone
+
+class Inst:
+       def __init__(self):
+               self.ibus = i = wishbone.Master("lm32i")
+               self.dbus = d = wishbone.Master("lm32d")
+               f.Declare(self, "interrupt", f.BV(32))
+               f.Declare(self, "ext_break")
+               self._inst = f.Instance("lm32_top",
+                       [("I_ADR_O", i.adr_o),
+                       ("I_DAT_O", i.dat_o),
+                       ("I_SEL_O", i.sel_o),
+                       ("I_CYC_O", i.cyc_o),
+                       ("I_STB_O", i.stb_o),
+                       ("I_WE_O", i.we_o),
+                       ("I_CTI_O", i.cti_o),
+                       ("I_LOCK_O", f.BV(1)),
+                       ("I_BTE_O", i.bte_o),
+                       ("D_ADR_O", d.adr_o),
+                       ("D_DAT_O", d.dat_o),
+                       ("D_SEL_O", d.sel_o),
+                       ("D_CYC_O", d.cyc_o),
+                       ("D_STB_O", d.stb_o),
+                       ("D_WE_O", d.we_o),
+                       ("D_CTI_O", d.cti_o),
+                       ("D_LOCK_O", f.BV(1)),
+                       ("D_BTE_O", d.bte_o)],
+                       [("interrupt", self.interrupt),
+                       #("ext_break", self.ext_break),
+                       ("I_DAT_I", i.dat_i),
+                       ("I_ACK_I", i.ack_i),
+                       ("I_ERR_I", i.err_i),
+                       ("I_RTY_I", f.BV(1)),
+                       ("D_DAT_I", d.dat_i),
+                       ("D_ACK_I", d.ack_i),
+                       ("D_ERR_I", d.err_i),
+                       ("D_RTY_I", f.BV(1))],
+                       [],
+                       "clk_i",
+                       "rst_i",
+                       "lm32")
+
+       def GetFragment(self):
+               comb = [
+                       f.Assign(self._inst.ins["I_RTY_I"], 0),
+                       f.Assign(self._inst.ins["D_RTY_I"], 0)
+               ]
+               return f.Fragment(comb=comb, instances=[self._inst])
\ No newline at end of file
diff --git a/milkymist/norflash/__init__.py b/milkymist/norflash/__init__.py
new file mode 100644 (file)
index 0000000..abec8d8
--- /dev/null
@@ -0,0 +1,31 @@
+from migen.fhdl import structure as f
+from migen.bus import wishbone
+from migen.corelogic import timeline
+from functools import partial
+
+class Inst:
+       def __init__(self, adr_width, rd_timing):
+               self.bus = wishbone.Slave("norflash")
+               d = partial(f.Declare, self)
+               d("adr", f.BV(adr_width-1))
+               d("d", f.BV(16))
+               d("oe_n")
+               d("we_n")
+               d("ce_n")
+               d("rst_n")
+               self.timeline = timeline.Inst(self.bus.cyc_i & self.bus.stb_i,
+                       [(0, [f.Assign(self.adr, f.Cat(0, self.bus.adr_i[2:adr_width]))]),
+                       (rd_timing, [
+                               f.Assign(self.bus.dat_o[16:], self.d),
+                               f.Assign(self.adr, f.Cat(1, self.bus.adr_i[2:adr_width]))]),
+                       (2*rd_timing, [
+                               f.Assign(self.bus.dat_o[:16], self.d),
+                               f.Assign(self.bus.ack_o, 1)]),
+                       (2*rd_timing+1, [
+                               f.Assign(self.bus.ack_o, 0)])])
+       
+       def GetFragment(self):
+               comb = [f.Assign(self.oe_n, 0), f.Assign(self.we_n, 1),
+                       f.Assign(self.ce_n, 0), f.Assign(self.rst_n, 1)]
+               return f.Fragment(comb, pads={self.adr, self.d, self.oe_n, self.we_n, self.ce_n, self.rst_n}) \
+                       + self.timeline.GetFragment()
diff --git a/milkymist/uart/__init__.py b/milkymist/uart/__init__.py
new file mode 100644 (file)
index 0000000..ed85e95
--- /dev/null
@@ -0,0 +1,28 @@
+from migen.fhdl import structure as f
+from migen.bus import csr
+
+class Inst:
+       def __init__(self, csr_addr, clk_freq, baud=115200, break_en_default=f.Constant(0)):
+               self.bus = csr.Slave("uart")
+               f.Declare(self, "tx")
+               f.Declare(self, "rx")
+               f.Declare(self, "irq")
+               f.Declare(self, "brk")
+               self._inst = f.Instance("uart",
+                       [("csr_do", self.bus.d_o),
+                       ("uart_tx", self.tx),
+                       ("irq", self.irq),
+                       ("break", self.brk)],
+                       [("csr_a", self.bus.a_i),
+                       ("csr_we", self.bus.we_i),
+                       ("csr_di", self.bus.d_i),
+                       ("uart_rx", self.rx)],
+                       [("csr_addr", f.Constant(csr_addr, f.BV(4))),
+                       ("clk_freq", clk_freq),
+                       ("baud", baud),
+                       ("break_en_default", break_en_default)],
+                       "sys_clk",
+                       "sys_rst")
+       
+       def GetFragment(self):
+               return f.Fragment(instances=[self._inst], pads={self.tx, self.rx})
diff --git a/tb/norflash/Makefile b/tb/norflash/Makefile
new file mode 100644 (file)
index 0000000..572800e
--- /dev/null
@@ -0,0 +1,20 @@
+SOURCES=tb_norflash.v norflash.v
+
+all: tb_norflash
+
+isim: tb_norflash
+       ./tb_norflash
+
+cversim: $(SOURCES)
+       cver $(SOURCES)
+
+norflash.v: norflash_conv.py
+       python3 norflash_conv.py > norflash.v
+
+clean:
+       rm -f tb_norflash verilog.log norflash.vcd norflash.v
+
+tb_norflash: $(SOURCES)
+       iverilog -o tb_norflash $(SOURCES)
+
+.PHONY: clean sim cversim
diff --git a/tb/norflash/norflash_conv.py b/tb/norflash/norflash_conv.py
new file mode 100644 (file)
index 0000000..e093d57
--- /dev/null
@@ -0,0 +1,10 @@
+from migen.fhdl import verilog
+from migen.fhdl import structure as f
+from migen.bus import wishbone
+from milkymist import norflash
+
+norflash0 = norflash.Inst(25, 12)
+frag = norflash0.GetFragment()
+v = verilog.Convert(frag, name="norflash",
+       ios={norflash0.bus.cyc_i, norflash0.bus.stb_i, norflash0.bus.we_i, norflash0.bus.adr_i, norflash0.bus.sel_i, norflash0.bus.dat_i, norflash0.bus.dat_o, norflash0.bus.ack_o})
+print(v)
diff --git a/tb/norflash/tb_norflash.v b/tb/norflash/tb_norflash.v
new file mode 100644 (file)
index 0000000..02599fa
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Milkymist SoC
+ * Copyright (C) 2007, 2008, 2009, 2010, 2011 Sebastien Bourdeauducq
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+`timescale 1ns / 1ps
+
+module tb_norflash();
+
+reg sys_clk;
+reg sys_rst;
+
+reg [31:0] wb_adr_i;
+wire [31:0] wb_dat_o;
+reg wb_cyc_i;
+reg wb_stb_i;
+wire wb_ack_o;
+reg [3:0] wb_sel_i;
+
+wire [23:0] flash_adr;
+wire [15:0] flash_d;
+reg [15:0] flash_do;
+
+always @(flash_adr) #110 flash_do <= flash_adr[15:0] + 16'b1;
+
+norflash dut(
+       .sys_clk(sys_clk),
+       .sys_rst(sys_rst),
+
+       .wishbone_norflash_adr_i(wb_adr_i),
+       .wishbone_norflash_dat_o(wb_dat_o),
+       .wishbone_norflash_cyc_i(wb_cyc_i),
+       .wishbone_norflash_stb_i(wb_stb_i),
+       .wishbone_norflash_ack_o(wb_ack_o),
+       .wishbone_norflash_sel_i(wb_sel_i),
+
+       .norflash_adr(flash_adr),
+       .norflash_d(flash_d),
+       .norflash_oe_n(flash_oe_n),
+       .norflash_we_n(flash_we_n)
+);
+
+//assign flash_d = flash_oe_n ? 16'bz : flash_do;
+assign flash_d = flash_do;
+
+task wbread;
+       input [31:0] address;
+       integer i;
+       begin
+               wb_adr_i <= address;
+               wb_cyc_i <= 1'b1;
+               wb_stb_i <= 1'b1;
+               
+               i = 1;
+               while(~wb_ack_o) begin
+                       #5 sys_clk <= 1'b1;
+                       #5 sys_clk <= 1'b0;
+                       i = i + 1;
+               end
+               
+               $display("Read address %h completed in %d cycles, result %h", address, i, wb_dat_o);
+               
+               wb_cyc_i <= 1'b0;
+               wb_stb_i <= 1'b0;
+               
+               /* Let the core release its ack */
+               #5 sys_clk <= 1'b1;
+               #5 sys_clk <= 1'b0;
+       end
+endtask
+
+initial begin
+       $dumpfile("norflash.vcd");
+       $dumpvars(1, dut);
+
+       sys_rst <= 1'b1;
+       sys_clk <= 1'b0;
+       
+       wb_adr_i <= 32'h00000000;
+       wb_cyc_i <= 1'b0;
+       wb_stb_i <= 1'b0;
+       wb_sel_i <= 4'b1111;
+
+       #5 sys_clk <= 1'b1;
+       #5 sys_clk <= 1'b0;
+       
+       sys_rst <= 1'b0;
+       #5 sys_clk <= 1'b1;
+       #5 sys_clk <= 1'b0;
+       
+       wbread(32'h00000000);
+       wbread(32'h00000004);
+
+       wb_sel_i = 4'b0010;
+       wbread(32'h0000fff1);
+
+       wb_sel_i = 4'b0100;
+       wbread(32'h0000fff2);
+
+       wb_sel_i = 4'b1000;
+       wbread(32'h0000fff3);
+
+       wb_sel_i = 4'b0100;
+       wbread(32'h0000fff0);
+
+       wb_sel_i = 4'b1111;
+       wbread(32'h00000010);
+       #5 sys_clk = 1'b1;
+       #5 sys_clk = 1'b0;
+       #5 sys_clk = 1'b1;
+       #5 sys_clk = 1'b0;
+       wbread(32'h00000040);
+       
+       $finish;
+end
+
+endmodule
diff --git a/top.py b/top.py
new file mode 100644 (file)
index 0000000..69439a4
--- /dev/null
+++ b/top.py
@@ -0,0 +1,22 @@
+from migen.fhdl import convtools, verilog, autofragment
+from migen.bus import wishbone, csr, wishbone2csr
+from milkymist import lm32, norflash, uart
+import constraints
+
+def Get():
+       cpu0 = lm32.Inst()
+       norflash0 = norflash.Inst(25, 12)
+       wishbone2csr0 = wishbone2csr.Inst()
+       wishbonecon0 = wishbone.InterconnectShared(
+               [cpu0.ibus, cpu0.dbus],
+               [(0, norflash0.bus), (3, wishbone2csr0.wishbone)],
+               register=True,
+               offset=1)
+       uart0 = uart.Inst(0, 50*1000*1000, baud=115200)
+       csrcon0 = csr.Interconnect(wishbone2csr0.csr, [uart0.bus])
+       
+       frag = autofragment.FromLocal()
+       vns = convtools.Namespace()
+       src_verilog = verilog.Convert(frag, name="soc", ns=vns)
+       src_ucf = constraints.Get(vns, norflash0, uart0)
+       return (src_verilog, src_ucf)
diff --git a/verilog/lm32/jtag_cores.v b/verilog/lm32/jtag_cores.v
new file mode 100644 (file)
index 0000000..d1a76c8
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Milkymist SoC
+ * Copyright (c) 2010 Michael Walle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+module jtag_cores (
+    input [7:0] reg_d,
+    input [2:0] reg_addr_d,
+    output reg_update,
+    output [7:0] reg_q,
+    output [2:0] reg_addr_q,
+    output jtck,
+    output jrstn
+);
+
+wire tck;
+wire tdi;
+wire tdo;
+wire shift;
+wire update;
+wire reset;
+
+jtag_tap jtag_tap (
+       .tck(tck),
+       .tdi(tdi),
+       .tdo(tdo),
+       .shift(shift),
+       .update(update),
+       .reset(reset)
+);
+
+reg [10:0] jtag_shift;
+reg [10:0] jtag_latched;
+
+always @(posedge tck or posedge reset)
+begin
+       if(reset)
+               jtag_shift <= 11'b0;
+       else begin
+               if(shift)
+                       jtag_shift <= {tdi, jtag_shift[10:1]};
+               else
+                       jtag_shift <= {reg_d, reg_addr_d};
+       end
+end
+
+assign tdo = jtag_shift[0];
+
+always @(posedge reg_update or posedge reset)
+begin
+       if(reset)
+               jtag_latched <= 11'b0;
+       else
+               jtag_latched <= jtag_shift;
+end
+
+assign reg_update = update;
+assign reg_q = jtag_latched[10:3];
+assign reg_addr_q = jtag_latched[2:0];
+assign jtck = tck;
+assign jrstn = ~reset;
+
+endmodule
diff --git a/verilog/lm32/jtag_tap_spartan6.v b/verilog/lm32/jtag_tap_spartan6.v
new file mode 100644 (file)
index 0000000..71b6879
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Milkymist SoC
+ * Copyright (c) 2010 Michael Walle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+module jtag_tap(
+       output tck,
+       output tdi,
+       input tdo,
+       output shift,
+       output update,
+       output reset
+);
+
+wire g_shift;
+wire g_update;
+
+assign shift = g_shift & sel;
+assign update = g_update & sel;
+
+BSCAN_SPARTAN6 #(
+       .JTAG_CHAIN(1)
+) bscan (
+       .CAPTURE(),
+       .DRCK(tck),
+       .RESET(reset),
+       .RUNTEST(),
+       .SEL(sel),
+       .SHIFT(g_shift),
+       .TCK(),
+       .TDI(tdi),
+       .TMS(),
+       .UPDATE(g_update),
+       .TDO(tdo)
+);
+
+endmodule
diff --git a/verilog/lm32/lm32_adder.v b/verilog/lm32/lm32_adder.v
new file mode 100644 (file)
index 0000000..d4fa41d
--- /dev/null
@@ -0,0 +1,136 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_adder.v
+// Title            : Integer adder / subtractor with comparison flag generation 
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_adder (
+    // ----- Inputs -------
+    adder_op_x,
+    adder_op_x_n,
+    operand_0_x,
+    operand_1_x,
+    // ----- Outputs -------
+    adder_result_x,
+    adder_carry_n_x,
+    adder_overflow_x
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input adder_op_x;                                       // Operating to perform, 0 for addition, 1 for subtraction
+input adder_op_x_n;                                     // Inverted version of adder_op_x
+input [`LM32_WORD_RNG] operand_0_x;                     // Operand to add, or subtract from
+input [`LM32_WORD_RNG] operand_1_x;                     // Opearnd to add, or subtract by
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output [`LM32_WORD_RNG] adder_result_x;                 // Result of addition or subtraction
+wire   [`LM32_WORD_RNG] adder_result_x;
+output adder_carry_n_x;                                 // Inverted carry
+wire   adder_carry_n_x;
+output adder_overflow_x;                                // Indicates if overflow occured, only valid for subtractions
+reg    adder_overflow_x;
+    
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+wire a_sign;                                            // Sign (i.e. positive or negative) of operand 0
+wire b_sign;                                            // Sign of operand 1
+wire result_sign;                                       // Sign of result
+
+/////////////////////////////////////////////////////
+// Instantiations 
+/////////////////////////////////////////////////////
+
+lm32_addsub addsub (
+    // ----- Inputs -----
+    .DataA          (operand_0_x), 
+    .DataB          (operand_1_x), 
+    .Cin            (adder_op_x), 
+    .Add_Sub        (adder_op_x_n), 
+    // ----- Ouputs -----
+    .Result         (adder_result_x), 
+    .Cout           (adder_carry_n_x)
+    );
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+// Extract signs of operands and result
+
+assign a_sign = operand_0_x[`LM32_WORD_WIDTH-1];
+assign b_sign = operand_1_x[`LM32_WORD_WIDTH-1];
+assign result_sign = adder_result_x[`LM32_WORD_WIDTH-1];
+
+// Determine whether an overflow occured when performing a subtraction
+
+always @(*)
+begin    
+    //  +ve - -ve = -ve -> overflow
+    //  -ve - +ve = +ve -> overflow
+    if  (   (!a_sign & b_sign & result_sign)
+         || (a_sign & !b_sign & !result_sign)
+        )
+        adder_overflow_x = `TRUE;
+    else
+        adder_overflow_x = `FALSE;
+end
+    
+endmodule
+
diff --git a/verilog/lm32/lm32_addsub.v b/verilog/lm32/lm32_addsub.v
new file mode 100644 (file)
index 0000000..2a37ad2
--- /dev/null
@@ -0,0 +1,95 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_addsub.v
+// Title            : PMI adder/subtractor.
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_addsub (
+    // ----- Inputs -------
+    DataA, 
+    DataB, 
+    Cin, 
+    Add_Sub, 
+    // ----- Outputs -------
+    Result, 
+    Cout
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input [31:0] DataA;
+input [31:0] DataB;
+input Cin;
+input Add_Sub;
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output [31:0] Result;
+wire   [31:0] Result;
+output Cout;
+wire   Cout;
+
+/////////////////////////////////////////////////////
+// Instantiations
+///////////////////////////////////////////////////// 
+
+// Modified for Milkymist: removed non-portable instantiated block
+            wire [32:0] tmp_addResult = DataA + DataB + Cin;
+            wire [32:0] tmp_subResult = DataA - DataB - !Cin;   
+   
+            assign  Result = (Add_Sub == 1) ? tmp_addResult[31:0] : tmp_subResult[31:0];
+            assign  Cout = (Add_Sub == 1) ? tmp_addResult[32] : !tmp_subResult[32];
+
+endmodule
diff --git a/verilog/lm32/lm32_cpu.v b/verilog/lm32/lm32_cpu.v
new file mode 100644 (file)
index 0000000..dc5be84
--- /dev/null
@@ -0,0 +1,2771 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_cpu.v
+// Title            : Top-level of CPU.
+// Dependencies     : lm32_include.v
+//
+// Version 3.8
+// 1. Feature: Support for dynamically switching EBA to DEBA via a GPIO.
+// 2. Bug: EA now reports instruction that caused the data abort, rather than
+//    next instruction.
+//
+// Version 3.4
+// 1. Bug Fix: In a tight infinite loop (add, sw, bi) incoming interrupts were 
+//    never serviced.
+//    
+// Version 3.3
+// 1. Feature: Support for memory that is tightly coupled to processor core, and 
+//    has a single-cycle access latency (same as caches). Instruction port has
+//    access to a dedicated physically-mapped memory. Data port has access to
+//    a dedicated physically-mapped memory. In order to be able to manipulate
+//    values in both these memories via the debugger, these memories also
+//    interface with the data port of LM32.
+// 2. Feature: Extended Configuration Register
+// 3. Bug Fix: Removed port names that conflict with keywords reserved in System-
+//    Verilog.
+//
+// Version 3.2
+// 1. Bug Fix: Single-stepping a load/store to invalid address causes debugger to
+//    hang. At the same time CPU fails to register data bus error exception. Bug
+//    is caused because (a) data bus error exception occurs after load/store has
+//    passed X stage and next sequential instruction (e.g., brk) is already in X
+//    stage, and (b) data bus error exception had lower priority than, say, brk
+//    exception.
+// 2. Bug Fix: If a brk (or scall/eret/bret) sequentially follows a load/store to
+//    invalid location, CPU will fail to register data bus error exception. The
+//    solution is to stall scall/eret/bret/brk instructions in D pipeline stage
+//    until load/store has completed.
+// 3. Feature: Enable precise identification of load/store that causes seg fault.
+// 4. SYNC resets used for register file when implemented in EBRs.
+//
+// Version 3.1
+// 1. Feature: LM32 Register File can now be mapped in to on-chip block RAM (EBR)
+//    instead of distributed memory by enabling the option in LM32 GUI. 
+// 2. Feature: LM32 also adds a static branch predictor to improve branch 
+//    performance. All immediate-based forward-pointing branches are predicted 
+//    not-taken. All immediate-based backward-pointing branches are predicted taken.
+// 
+// Version 7.0SP2, 3.0
+// No Change
+//
+// Version 6.1.17
+// Initial Release
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_cpu (
+    // ----- Inputs -------
+    clk_i,
+`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE
+    clk_n_i,
+`endif    
+    rst_i,
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+    at_debug,
+ `endif
+`endif
+    // From external devices
+`ifdef CFG_INTERRUPTS_ENABLED
+    interrupt,
+`endif
+    // From user logic
+`ifdef CFG_USER_ENABLED
+    user_result,
+    user_complete,
+`endif     
+`ifdef CFG_JTAG_ENABLED
+    // From JTAG
+    jtag_clk,
+    jtag_update, 
+    jtag_reg_q,
+    jtag_reg_addr_q,
+`endif
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+    ext_break,
+`endif
+`ifdef CFG_IWB_ENABLED
+    // Instruction Wishbone master
+    I_DAT_I,
+    I_ACK_I,
+    I_ERR_I,
+    I_RTY_I,
+`endif
+    // Data Wishbone master
+    D_DAT_I,
+    D_ACK_I,
+    D_ERR_I,
+    D_RTY_I,
+    // ----- Outputs -------
+`ifdef CFG_TRACE_ENABLED
+    trace_pc,
+    trace_pc_valid,
+    trace_exception,
+    trace_eid,
+    trace_eret,
+`ifdef CFG_DEBUG_ENABLED
+    trace_bret,
+`endif
+`endif
+`ifdef CFG_JTAG_ENABLED
+    jtag_reg_d,
+    jtag_reg_addr_d,
+`endif
+`ifdef CFG_USER_ENABLED    
+    user_valid,
+    user_opcode,
+    user_operand_0,
+    user_operand_1,
+`endif    
+`ifdef CFG_IWB_ENABLED
+    // Instruction Wishbone master
+    I_DAT_O,
+    I_ADR_O,
+    I_CYC_O,
+    I_SEL_O,
+    I_STB_O,
+    I_WE_O,
+    I_CTI_O,
+    I_LOCK_O,
+    I_BTE_O,
+`endif
+    // Data Wishbone master
+    D_DAT_O,
+    D_ADR_O,
+    D_CYC_O,
+    D_SEL_O,
+    D_STB_O,
+    D_WE_O,
+    D_CTI_O,
+    D_LOCK_O,
+    D_BTE_O
+    );
+
+/////////////////////////////////////////////////////
+// Parameters
+/////////////////////////////////////////////////////
+
+parameter eba_reset = `CFG_EBA_RESET;                           // Reset value for EBA CSR
+`ifdef CFG_DEBUG_ENABLED
+parameter deba_reset = `CFG_DEBA_RESET;                         // Reset value for DEBA CSR
+`endif
+
+`ifdef CFG_ICACHE_ENABLED
+parameter icache_associativity = `CFG_ICACHE_ASSOCIATIVITY;     // Associativity of the cache (Number of ways)
+parameter icache_sets = `CFG_ICACHE_SETS;                       // Number of sets
+parameter icache_bytes_per_line = `CFG_ICACHE_BYTES_PER_LINE;   // Number of bytes per cache line
+parameter icache_base_address = `CFG_ICACHE_BASE_ADDRESS;       // Base address of cachable memory
+parameter icache_limit = `CFG_ICACHE_LIMIT;                     // Limit (highest address) of cachable memory
+`else
+parameter icache_associativity = 1;    
+parameter icache_sets = 512;                      
+parameter icache_bytes_per_line = 16;  
+parameter icache_base_address = 0;      
+parameter icache_limit = 0;                    
+`endif
+
+`ifdef CFG_DCACHE_ENABLED
+parameter dcache_associativity = `CFG_DCACHE_ASSOCIATIVITY;     // Associativity of the cache (Number of ways)
+parameter dcache_sets = `CFG_DCACHE_SETS;                       // Number of sets
+parameter dcache_bytes_per_line = `CFG_DCACHE_BYTES_PER_LINE;   // Number of bytes per cache line
+parameter dcache_base_address = `CFG_DCACHE_BASE_ADDRESS;       // Base address of cachable memory
+parameter dcache_limit = `CFG_DCACHE_LIMIT;                     // Limit (highest address) of cachable memory
+`else
+parameter dcache_associativity = 1;    
+parameter dcache_sets = 512;                      
+parameter dcache_bytes_per_line = 16;  
+parameter dcache_base_address = 0;      
+parameter dcache_limit = 0;                    
+`endif
+
+`ifdef CFG_DEBUG_ENABLED
+parameter watchpoints = `CFG_WATCHPOINTS;                       // Number of h/w watchpoint CSRs
+`else
+parameter watchpoints = 0;
+`endif
+`ifdef CFG_ROM_DEBUG_ENABLED
+parameter breakpoints = `CFG_BREAKPOINTS;                       // Number of h/w breakpoint CSRs
+`else
+parameter breakpoints = 0;
+`endif
+
+`ifdef CFG_INTERRUPTS_ENABLED
+parameter interrupts = `CFG_INTERRUPTS;                         // Number of interrupts
+`else
+parameter interrupts = 0;
+`endif
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                    // Clock
+`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE
+input clk_n_i;                                  // Inverted clock
+`endif    
+input rst_i;                                    // Reset
+
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+   input at_debug;                              // GPIO input that maps EBA to DEBA
+ `endif
+`endif
+
+`ifdef CFG_INTERRUPTS_ENABLED
+input [`LM32_INTERRUPT_RNG] interrupt;          // Interrupt pins
+`endif
+
+`ifdef CFG_USER_ENABLED
+input [`LM32_WORD_RNG] user_result;             // User-defined instruction result
+input user_complete;                            // User-defined instruction execution is complete
+`endif    
+
+`ifdef CFG_JTAG_ENABLED
+input jtag_clk;                                 // JTAG clock
+input jtag_update;                              // JTAG state machine is in data register update state
+input [`LM32_BYTE_RNG] jtag_reg_q;              
+input [2:0] jtag_reg_addr_q;
+`endif
+
+`ifdef CFG_IWB_ENABLED
+input [`LM32_WORD_RNG] I_DAT_I;                 // Instruction Wishbone interface read data
+input I_ACK_I;                                  // Instruction Wishbone interface acknowledgement
+input I_ERR_I;                                  // Instruction Wishbone interface error
+input I_RTY_I;                                  // Instruction Wishbone interface retry
+`endif
+
+input [`LM32_WORD_RNG] D_DAT_I;                 // Data Wishbone interface read data
+input D_ACK_I;                                  // Data Wishbone interface acknowledgement
+input D_ERR_I;                                  // Data Wishbone interface error
+input D_RTY_I;                                  // Data Wishbone interface retry
+
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+input ext_break;
+`endif
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+`ifdef CFG_TRACE_ENABLED
+output [`LM32_PC_RNG] trace_pc;                 // PC to trace
+reg    [`LM32_PC_RNG] trace_pc;
+output trace_pc_valid;                          // Indicates that a new trace PC is valid
+reg    trace_pc_valid;
+output trace_exception;                         // Indicates an exception has occured
+reg    trace_exception;
+output [`LM32_EID_RNG] trace_eid;               // Indicates what type of exception has occured
+reg    [`LM32_EID_RNG] trace_eid;
+output trace_eret;                              // Indicates an eret instruction has been executed
+reg    trace_eret;
+`ifdef CFG_DEBUG_ENABLED
+output trace_bret;                              // Indicates a bret instruction has been executed
+reg    trace_bret;
+`endif
+`endif
+
+`ifdef CFG_JTAG_ENABLED
+output [`LM32_BYTE_RNG] jtag_reg_d;
+wire   [`LM32_BYTE_RNG] jtag_reg_d;
+output [2:0] jtag_reg_addr_d;
+wire   [2:0] jtag_reg_addr_d;
+`endif
+
+`ifdef CFG_USER_ENABLED
+output user_valid;                              // Indicates if user_opcode is valid
+wire   user_valid;
+output [`LM32_USER_OPCODE_RNG] user_opcode;     // User-defined instruction opcode
+reg    [`LM32_USER_OPCODE_RNG] user_opcode;
+output [`LM32_WORD_RNG] user_operand_0;         // First operand for user-defined instruction
+wire   [`LM32_WORD_RNG] user_operand_0;
+output [`LM32_WORD_RNG] user_operand_1;         // Second operand for user-defined instruction
+wire   [`LM32_WORD_RNG] user_operand_1;
+`endif
+
+`ifdef CFG_IWB_ENABLED
+output [`LM32_WORD_RNG] I_DAT_O;                // Instruction Wishbone interface write data
+wire   [`LM32_WORD_RNG] I_DAT_O;
+output [`LM32_WORD_RNG] I_ADR_O;                // Instruction Wishbone interface address
+wire   [`LM32_WORD_RNG] I_ADR_O;
+output I_CYC_O;                                 // Instruction Wishbone interface cycle
+wire   I_CYC_O;
+output [`LM32_BYTE_SELECT_RNG] I_SEL_O;         // Instruction Wishbone interface byte select
+wire   [`LM32_BYTE_SELECT_RNG] I_SEL_O;
+output I_STB_O;                                 // Instruction Wishbone interface strobe
+wire   I_STB_O;
+output I_WE_O;                                  // Instruction Wishbone interface write enable
+wire   I_WE_O;
+output [`LM32_CTYPE_RNG] I_CTI_O;               // Instruction Wishbone interface cycle type 
+wire   [`LM32_CTYPE_RNG] I_CTI_O;
+output I_LOCK_O;                                // Instruction Wishbone interface lock bus
+wire   I_LOCK_O;
+output [`LM32_BTYPE_RNG] I_BTE_O;               // Instruction Wishbone interface burst type 
+wire   [`LM32_BTYPE_RNG] I_BTE_O;
+`endif
+
+output [`LM32_WORD_RNG] D_DAT_O;                // Data Wishbone interface write data
+wire   [`LM32_WORD_RNG] D_DAT_O;
+output [`LM32_WORD_RNG] D_ADR_O;                // Data Wishbone interface address
+wire   [`LM32_WORD_RNG] D_ADR_O;
+output D_CYC_O;                                 // Data Wishbone interface cycle
+wire   D_CYC_O;
+output [`LM32_BYTE_SELECT_RNG] D_SEL_O;         // Data Wishbone interface byte select
+wire   [`LM32_BYTE_SELECT_RNG] D_SEL_O;
+output D_STB_O;                                 // Data Wishbone interface strobe
+wire   D_STB_O;
+output D_WE_O;                                  // Data Wishbone interface write enable
+wire   D_WE_O;
+output [`LM32_CTYPE_RNG] D_CTI_O;               // Data Wishbone interface cycle type 
+wire   [`LM32_CTYPE_RNG] D_CTI_O;
+output D_LOCK_O;                                // Date Wishbone interface lock bus
+wire   D_LOCK_O;
+output [`LM32_BTYPE_RNG] D_BTE_O;               // Data Wishbone interface burst type 
+wire   [`LM32_BTYPE_RNG] D_BTE_O;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+// Pipeline registers
+
+`ifdef LM32_CACHE_ENABLED
+reg valid_a;                                    // Instruction in A stage is valid
+`endif
+reg valid_f;                                    // Instruction in F stage is valid
+reg valid_d;                                    // Instruction in D stage is valid
+reg valid_x;                                    // Instruction in X stage is valid
+reg valid_m;                                    // Instruction in M stage is valid
+reg valid_w;                                    // Instruction in W stage is valid
+   
+wire q_x;
+wire [`LM32_WORD_RNG] immediate_d;              // Immediate operand
+wire load_d;                                    // Indicates a load instruction
+reg load_x;                                     
+reg load_m;
+wire load_q_x;
+wire store_q_x;
+wire store_d;                                   // Indicates a store instruction
+reg store_x;
+reg store_m;
+wire [`LM32_SIZE_RNG] size_d;                   // Size of load/store (byte, hword, word)
+reg [`LM32_SIZE_RNG] size_x;
+wire branch_d;                                  // Indicates a branch instruction
+wire branch_predict_d;                          // Indicates a branch is predicted
+wire branch_predict_taken_d;                    // Indicates a branch is predicted taken
+wire [`LM32_PC_RNG] branch_predict_address_d;   // Address to which predicted branch jumps
+wire [`LM32_PC_RNG] branch_target_d;
+wire bi_unconditional;
+wire bi_conditional;
+reg branch_x;                                   
+reg branch_predict_x;
+reg branch_predict_taken_x;
+reg branch_m;
+reg branch_predict_m;
+reg branch_predict_taken_m;
+wire branch_mispredict_taken_m;                 // Indicates a branch was mispredicted as taken
+wire branch_flushX_m;                           // Indicates that instruction in X stage must be squashed
+wire branch_reg_d;                              // Branch to register or immediate
+wire [`LM32_PC_RNG] branch_offset_d;            // Branch offset for immediate branches
+reg [`LM32_PC_RNG] branch_target_x;             // Address to branch to
+reg [`LM32_PC_RNG] branch_target_m;
+wire [`LM32_D_RESULT_SEL_0_RNG] d_result_sel_0_d; // Which result should be selected in D stage for operand 0
+wire [`LM32_D_RESULT_SEL_1_RNG] d_result_sel_1_d; // Which result should be selected in D stage for operand 1
+
+wire x_result_sel_csr_d;                        // Select X stage result from CSRs
+reg x_result_sel_csr_x;
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+wire x_result_sel_mc_arith_d;                   // Select X stage result from multi-cycle arithmetic unit
+reg x_result_sel_mc_arith_x;
+`endif
+`ifdef LM32_NO_BARREL_SHIFT    
+wire x_result_sel_shift_d;                      // Select X stage result from shifter
+reg x_result_sel_shift_x;
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+wire x_result_sel_sext_d;                       // Select X stage result from sign-extend logic
+reg x_result_sel_sext_x;
+`endif
+wire x_result_sel_logic_d;                      // Select X stage result from logic op unit
+reg x_result_sel_logic_x;
+`ifdef CFG_USER_ENABLED
+wire x_result_sel_user_d;                       // Select X stage result from user-defined logic
+reg x_result_sel_user_x;
+`endif
+wire x_result_sel_add_d;                        // Select X stage result from adder
+reg x_result_sel_add_x;
+wire m_result_sel_compare_d;                    // Select M stage result from comparison logic
+reg m_result_sel_compare_x;
+reg m_result_sel_compare_m;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+wire m_result_sel_shift_d;                      // Select M stage result from shifter
+reg m_result_sel_shift_x;
+reg m_result_sel_shift_m;
+`endif
+wire w_result_sel_load_d;                       // Select W stage result from load/store unit
+reg w_result_sel_load_x;
+reg w_result_sel_load_m;
+reg w_result_sel_load_w;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+wire w_result_sel_mul_d;                        // Select W stage result from multiplier
+reg w_result_sel_mul_x;
+reg w_result_sel_mul_m;
+reg w_result_sel_mul_w;
+`endif
+wire x_bypass_enable_d;                         // Whether result is bypassable in X stage
+reg x_bypass_enable_x;                          
+wire m_bypass_enable_d;                         // Whether result is bypassable in M stage
+reg m_bypass_enable_x;                          
+reg m_bypass_enable_m;
+wire sign_extend_d;                             // Whether to sign-extend or zero-extend
+reg sign_extend_x;
+wire write_enable_d;                            // Register file write enable
+reg write_enable_x;
+wire write_enable_q_x;
+reg write_enable_m;
+wire write_enable_q_m;
+reg write_enable_w;
+wire write_enable_q_w;
+wire read_enable_0_d;                           // Register file read enable 0
+wire [`LM32_REG_IDX_RNG] read_idx_0_d;          // Register file read index 0
+wire read_enable_1_d;                           // Register file read enable 1
+wire [`LM32_REG_IDX_RNG] read_idx_1_d;          // Register file read index 1
+wire [`LM32_REG_IDX_RNG] write_idx_d;           // Register file write index
+reg [`LM32_REG_IDX_RNG] write_idx_x;            
+reg [`LM32_REG_IDX_RNG] write_idx_m;
+reg [`LM32_REG_IDX_RNG] write_idx_w;
+wire [`LM32_CSR_RNG] csr_d;                     // CSR read/write index
+reg  [`LM32_CSR_RNG] csr_x;                  
+wire [`LM32_CONDITION_RNG] condition_d;         // Branch condition
+reg [`LM32_CONDITION_RNG] condition_x;          
+`ifdef CFG_DEBUG_ENABLED
+wire break_d;                                   // Indicates a break instruction
+reg break_x;                                    
+`endif
+wire scall_d;                                   // Indicates a scall instruction
+reg scall_x;    
+wire eret_d;                                    // Indicates an eret instruction
+reg eret_x;
+wire eret_q_x;
+reg eret_m;
+`ifdef CFG_TRACE_ENABLED
+reg eret_w;
+`endif
+`ifdef CFG_DEBUG_ENABLED
+wire bret_d;                                    // Indicates a bret instruction
+reg bret_x;
+wire bret_q_x;
+reg bret_m;
+`ifdef CFG_TRACE_ENABLED
+reg bret_w;
+`endif
+`endif
+wire csr_write_enable_d;                        // CSR write enable
+reg csr_write_enable_x;
+wire csr_write_enable_q_x;
+`ifdef CFG_USER_ENABLED
+wire [`LM32_USER_OPCODE_RNG] user_opcode_d;     // User-defined instruction opcode
+`endif
+
+`ifdef CFG_BUS_ERRORS_ENABLED
+wire bus_error_d;                               // Indicates an bus error occured while fetching the instruction in this pipeline stage
+reg bus_error_x;
+reg data_bus_error_exception_m;
+reg [`LM32_PC_RNG] memop_pc_w;
+`endif
+
+reg [`LM32_WORD_RNG] d_result_0;                // Result of instruction in D stage (operand 0)
+reg [`LM32_WORD_RNG] d_result_1;                // Result of instruction in D stage (operand 1)
+reg [`LM32_WORD_RNG] x_result;                  // Result of instruction in X stage
+reg [`LM32_WORD_RNG] m_result;                  // Result of instruction in M stage
+reg [`LM32_WORD_RNG] w_result;                  // Result of instruction in W stage
+
+reg [`LM32_WORD_RNG] operand_0_x;               // Operand 0 for X stage instruction
+reg [`LM32_WORD_RNG] operand_1_x;               // Operand 1 for X stage instruction
+reg [`LM32_WORD_RNG] store_operand_x;           // Data read from register to store
+reg [`LM32_WORD_RNG] operand_m;                 // Operand for M stage instruction
+reg [`LM32_WORD_RNG] operand_w;                 // Operand for W stage instruction
+
+// To/from register file
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+reg [`LM32_WORD_RNG] reg_data_live_0;          
+reg [`LM32_WORD_RNG] reg_data_live_1;  
+reg use_buf;                                    // Whether to use reg_data_live or reg_data_buf
+reg [`LM32_WORD_RNG] reg_data_buf_0;
+reg [`LM32_WORD_RNG] reg_data_buf_1;
+`endif
+`ifdef LM32_EBR_REGISTER_FILE
+`else
+reg [`LM32_WORD_RNG] registers[0:(1<<`LM32_REG_IDX_WIDTH)-1];   // Register file
+`endif
+wire [`LM32_WORD_RNG] reg_data_0;               // Register file read port 0 data         
+wire [`LM32_WORD_RNG] reg_data_1;               // Register file read port 1 data
+reg [`LM32_WORD_RNG] bypass_data_0;             // Register value 0 after bypassing
+reg [`LM32_WORD_RNG] bypass_data_1;             // Register value 1 after bypassing
+wire reg_write_enable_q_w;
+
+reg interlock;                                  // Indicates pipeline should be stalled because of a read-after-write hazzard
+
+wire stall_a;                                   // Stall instruction in A pipeline stage
+wire stall_f;                                   // Stall instruction in F pipeline stage
+wire stall_d;                                   // Stall instruction in D pipeline stage
+wire stall_x;                                   // Stall instruction in X pipeline stage
+wire stall_m;                                   // Stall instruction in M pipeline stage
+
+// To/from adder
+wire adder_op_d;                                // Whether to add or subtract
+reg adder_op_x;                                 
+reg adder_op_x_n;                               // Inverted version of adder_op_x
+wire [`LM32_WORD_RNG] adder_result_x;           // Result from adder
+wire adder_overflow_x;                          // Whether a signed overflow occured
+wire adder_carry_n_x;                           // Whether a carry was generated
+
+// To/from logical operations unit
+wire [`LM32_LOGIC_OP_RNG] logic_op_d;           // Which operation to perform
+reg [`LM32_LOGIC_OP_RNG] logic_op_x;            
+wire [`LM32_WORD_RNG] logic_result_x;           // Result of logical operation
+
+`ifdef CFG_SIGN_EXTEND_ENABLED
+// From sign-extension unit
+wire [`LM32_WORD_RNG] sextb_result_x;           // Result of byte sign-extension
+wire [`LM32_WORD_RNG] sexth_result_x;           // Result of half-word sign-extenstion
+wire [`LM32_WORD_RNG] sext_result_x;            // Result of sign-extension specified by instruction
+`endif
+
+// To/from shifter
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+`ifdef CFG_ROTATE_ENABLED
+wire rotate_d;                                  // Whether we should rotate or shift
+reg rotate_x;                                    
+`endif
+wire direction_d;                               // Which direction to shift in
+reg direction_x;                                        
+wire [`LM32_WORD_RNG] shifter_result_m;         // Result of shifter
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+wire shift_left_d;                              // Indicates whether to perform a left shift or not
+wire shift_left_q_d;
+wire shift_right_d;                             // Indicates whether to perform a right shift or not
+wire shift_right_q_d;
+`endif
+`ifdef LM32_NO_BARREL_SHIFT
+wire [`LM32_WORD_RNG] shifter_result_x;         // Result of single-bit right shifter
+`endif
+
+// To/from multiplier
+`ifdef LM32_MULTIPLY_ENABLED
+wire [`LM32_WORD_RNG] multiplier_result_w;      // Result from multiplier
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+wire multiply_d;                                // Indicates whether to perform a multiply or not
+wire multiply_q_d;
+`endif
+
+// To/from divider
+`ifdef CFG_MC_DIVIDE_ENABLED
+wire divide_d;                                  // Indicates whether to perform a divider or not
+wire divide_q_d;
+wire modulus_d;
+wire modulus_q_d;
+wire divide_by_zero_x;                          // Indicates an attempt was made to divide by zero
+`endif
+
+// To from multi-cycle arithmetic unit
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+wire mc_stall_request_x;                        // Multi-cycle arithmetic unit stall request
+wire [`LM32_WORD_RNG] mc_result_x;
+`endif
+
+// From CSRs
+`ifdef CFG_INTERRUPTS_ENABLED
+wire [`LM32_WORD_RNG] interrupt_csr_read_data_x;// Data read from interrupt CSRs
+`endif
+wire [`LM32_WORD_RNG] cfg;                      // Configuration CSR
+wire [`LM32_WORD_RNG] cfg2;                     // Extended Configuration CSR
+`ifdef CFG_CYCLE_COUNTER_ENABLED
+reg [`LM32_WORD_RNG] cc;                        // Cycle counter CSR
+`endif
+reg [`LM32_WORD_RNG] csr_read_data_x;           // Data read from CSRs
+
+// To/from instruction unit
+wire [`LM32_PC_RNG] pc_f;                       // PC of instruction in F stage
+wire [`LM32_PC_RNG] pc_d;                       // PC of instruction in D stage
+wire [`LM32_PC_RNG] pc_x;                       // PC of instruction in X stage
+wire [`LM32_PC_RNG] pc_m;                       // PC of instruction in M stage
+wire [`LM32_PC_RNG] pc_w;                       // PC of instruction in W stage
+`ifdef CFG_TRACE_ENABLED
+reg [`LM32_PC_RNG] pc_c;                        // PC of last commited instruction
+`endif
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+wire [`LM32_INSTRUCTION_RNG] instruction_f;     // Instruction in F stage
+`endif
+//pragma attribute instruction_d preserve_signal true
+//pragma attribute instruction_d preserve_driver true
+wire [`LM32_INSTRUCTION_RNG] instruction_d;     // Instruction in D stage
+`ifdef CFG_ICACHE_ENABLED
+wire iflush;                                    // Flush instruction cache
+wire icache_stall_request;                      // Stall pipeline because instruction cache is busy
+wire icache_restart_request;                    // Restart instruction that caused an instruction cache miss
+wire icache_refill_request;                     // Request to refill instruction cache
+wire icache_refilling;                          // Indicates the instruction cache is being refilled
+`endif
+`ifdef CFG_IROM_ENABLED
+wire [`LM32_WORD_RNG] irom_store_data_m;        // Store data to instruction ROM
+wire [`LM32_WORD_RNG] irom_address_xm;          // Address to instruction ROM from load-store unit
+wire [`LM32_WORD_RNG] irom_data_m;              // Load data from instruction ROM
+wire irom_we_xm;                                // Indicates data needs to be written to instruction ROM
+wire irom_stall_request_x;                      // Indicates D stage needs to be stalled on a store to instruction ROM
+`endif
+
+// To/from load/store unit
+`ifdef CFG_DCACHE_ENABLED
+wire dflush_x;                                  // Flush data cache    
+reg dflush_m;                                    
+wire dcache_stall_request;                      // Stall pipeline because data cache is busy
+wire dcache_restart_request;                    // Restart instruction that caused a data cache miss
+wire dcache_refill_request;                     // Request to refill data cache
+wire dcache_refilling;                          // Indicates the data cache is being refilled
+`endif
+wire [`LM32_WORD_RNG] load_data_w;              // Result of a load instruction
+wire stall_wb_load;                             // Stall pipeline because of a load via the data Wishbone interface
+
+// To/from JTAG interface
+`ifdef CFG_JTAG_ENABLED
+`ifdef CFG_JTAG_UART_ENABLED
+wire [`LM32_WORD_RNG] jtx_csr_read_data;        // Read data for JTX CSR
+wire [`LM32_WORD_RNG] jrx_csr_read_data;        // Read data for JRX CSR
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+wire jtag_csr_write_enable;                     // Debugger CSR write enable
+wire [`LM32_WORD_RNG] jtag_csr_write_data;      // Data to write to specified CSR
+wire [`LM32_CSR_RNG] jtag_csr;                  // Which CSR to write
+wire jtag_read_enable;                          
+wire [`LM32_BYTE_RNG] jtag_read_data;
+wire jtag_write_enable;
+wire [`LM32_BYTE_RNG] jtag_write_data;
+wire [`LM32_WORD_RNG] jtag_address;
+wire jtag_access_complete;
+`endif
+`ifdef CFG_DEBUG_ENABLED
+wire jtag_break;                                // Request from debugger to raise a breakpoint
+`endif
+`endif
+
+// Hazzard detection
+wire raw_x_0;                                   // RAW hazzard between instruction in X stage and read port 0
+wire raw_x_1;                                   // RAW hazzard between instruction in X stage and read port 1
+wire raw_m_0;                                   // RAW hazzard between instruction in M stage and read port 0
+wire raw_m_1;                                   // RAW hazzard between instruction in M stage and read port 1
+wire raw_w_0;                                   // RAW hazzard between instruction in W stage and read port 0
+wire raw_w_1;                                   // RAW hazzard between instruction in W stage and read port 1
+
+// Control flow
+wire cmp_zero;                                  // Result of comparison is zero
+wire cmp_negative;                              // Result of comparison is negative
+wire cmp_overflow;                              // Comparison produced an overflow
+wire cmp_carry_n;                               // Comparison produced a carry, inverted
+reg condition_met_x;                            // Condition of branch instruction is met
+reg condition_met_m;
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+wire branch_taken_x;                            // Branch is taken in X stage
+`endif
+wire branch_taken_m;                            // Branch is taken in M stage
+
+wire kill_f;                                    // Kill instruction in F stage
+wire kill_d;                                    // Kill instruction in D stage
+wire kill_x;                                    // Kill instruction in X stage
+wire kill_m;                                    // Kill instruction in M stage
+wire kill_w;                                    // Kill instruction in W stage
+
+reg [`LM32_PC_WIDTH+2-1:8] eba;                 // Exception Base Address (EBA) CSR
+`ifdef CFG_DEBUG_ENABLED
+reg [`LM32_PC_WIDTH+2-1:8] deba;                // Debug Exception Base Address (DEBA) CSR
+`endif
+reg [`LM32_EID_RNG] eid_x;                      // Exception ID in X stage
+`ifdef CFG_TRACE_ENABLED
+reg [`LM32_EID_RNG] eid_m;                      // Exception ID in M stage
+reg [`LM32_EID_RNG] eid_w;                      // Exception ID in W stage
+`endif
+
+`ifdef CFG_DEBUG_ENABLED
+`ifdef LM32_SINGLE_STEP_ENABLED
+wire dc_ss;                                     // Is single-step enabled
+`endif
+wire dc_re;                                     // Remap all exceptions
+wire exception_x;                               // An exception occured in the X stage
+reg exception_m;                                // An instruction that caused an exception is in the M stage
+wire debug_exception_x;                         // Indicates if a debug exception has occured
+reg debug_exception_m;
+reg debug_exception_w;
+wire debug_exception_q_w;
+wire non_debug_exception_x;                     // Indicates if a non debug exception has occured
+reg non_debug_exception_m;
+reg non_debug_exception_w;
+wire non_debug_exception_q_w;
+`else
+wire exception_x;                               // Indicates if a debug exception has occured
+reg exception_m;
+reg exception_w;
+wire exception_q_w;
+`endif
+
+`ifdef CFG_DEBUG_ENABLED
+wire reset_exception;                           // Indicates if a reset exception has occured
+`endif
+`ifdef CFG_INTERRUPTS_ENABLED
+wire interrupt_exception;                       // Indicates if an interrupt exception has occured
+`endif
+`ifdef CFG_DEBUG_ENABLED
+wire breakpoint_exception;                      // Indicates if a breakpoint exception has occured
+wire watchpoint_exception;                      // Indicates if a watchpoint exception has occured
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+wire instruction_bus_error_exception;           // Indicates if an instruction bus error exception has occured
+wire data_bus_error_exception;                  // Indicates if a data bus error exception has occured
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+wire divide_by_zero_exception;                  // Indicates if a divide by zero exception has occured
+`endif
+wire system_call_exception;                     // Indicates if a system call exception has occured
+
+`ifdef CFG_BUS_ERRORS_ENABLED
+reg data_bus_error_seen;                        // Indicates if a data bus error was seen
+`endif
+
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+reg ext_break_r;
+`endif
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+
+/////////////////////////////////////////////////////
+// Instantiations
+///////////////////////////////////////////////////// 
+
+// Instruction unit
+lm32_instruction_unit #(
+    .associativity          (icache_associativity),
+    .sets                   (icache_sets),
+    .bytes_per_line         (icache_bytes_per_line),
+    .base_address           (icache_base_address),
+    .limit                  (icache_limit)
+  ) instruction_unit (
+    // ----- Inputs -------
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+    .at_debug               (at_debug),
+ `endif
+`endif
+    // From pipeline
+    .stall_a                (stall_a),
+    .stall_f                (stall_f),
+    .stall_d                (stall_d),
+    .stall_x                (stall_x),
+    .stall_m                (stall_m),
+    .valid_f                (valid_f),
+    .valid_d                (valid_d),
+    .kill_f                 (kill_f),
+    .branch_predict_taken_d (branch_predict_taken_d),
+    .branch_predict_address_d (branch_predict_address_d),
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+    .branch_taken_x         (branch_taken_x),
+    .branch_target_x        (branch_target_x),
+`endif
+    .exception_m            (exception_m),
+    .branch_taken_m         (branch_taken_m),
+    .branch_mispredict_taken_m (branch_mispredict_taken_m),
+    .branch_target_m        (branch_target_m),
+`ifdef CFG_ICACHE_ENABLED
+    .iflush                 (iflush),
+`endif
+`ifdef CFG_IROM_ENABLED
+    .irom_store_data_m      (irom_store_data_m),
+    .irom_address_xm        (irom_address_xm),
+    .irom_we_xm             (irom_we_xm),
+`endif
+`ifdef CFG_DCACHE_ENABLED
+    .dcache_restart_request (dcache_restart_request),
+    .dcache_refill_request  (dcache_refill_request),
+    .dcache_refilling       (dcache_refilling),
+`endif        
+`ifdef CFG_IWB_ENABLED
+    // From Wishbone
+    .i_dat_i                (I_DAT_I),
+    .i_ack_i                (I_ACK_I),
+    .i_err_i                (I_ERR_I),
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    .jtag_read_enable       (jtag_read_enable),
+    .jtag_write_enable      (jtag_write_enable),
+    .jtag_write_data        (jtag_write_data),
+    .jtag_address           (jtag_address),
+`endif
+    // ----- Outputs -------
+    // To pipeline
+    .pc_f                   (pc_f),
+    .pc_d                   (pc_d),
+    .pc_x                   (pc_x),
+    .pc_m                   (pc_m),
+    .pc_w                   (pc_w),
+`ifdef CFG_ICACHE_ENABLED
+    .icache_stall_request   (icache_stall_request),
+    .icache_restart_request (icache_restart_request),
+    .icache_refill_request  (icache_refill_request),
+    .icache_refilling       (icache_refilling),
+`endif
+`ifdef CFG_IROM_ENABLED
+    .irom_data_m            (irom_data_m),
+`endif
+`ifdef CFG_IWB_ENABLED
+    // To Wishbone
+    .i_dat_o                (I_DAT_O),
+    .i_adr_o                (I_ADR_O),
+    .i_cyc_o                (I_CYC_O),
+    .i_sel_o                (I_SEL_O),
+    .i_stb_o                (I_STB_O),
+    .i_we_o                 (I_WE_O),
+    .i_cti_o                (I_CTI_O),
+    .i_lock_o               (I_LOCK_O),
+    .i_bte_o                (I_BTE_O),
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    .jtag_read_data         (jtag_read_data),
+    .jtag_access_complete   (jtag_access_complete),
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+    .bus_error_d            (bus_error_d),
+`endif
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+    .instruction_f          (instruction_f),
+`endif
+    .instruction_d          (instruction_d)
+    );
+
+// Instruction decoder
+lm32_decoder decoder (
+    // ----- Inputs -------
+    .instruction            (instruction_d),
+    // ----- Outputs -------
+    .d_result_sel_0         (d_result_sel_0_d),
+    .d_result_sel_1         (d_result_sel_1_d),
+    .x_result_sel_csr       (x_result_sel_csr_d),
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+    .x_result_sel_mc_arith  (x_result_sel_mc_arith_d),
+`endif
+`ifdef LM32_NO_BARREL_SHIFT    
+    .x_result_sel_shift     (x_result_sel_shift_d),
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+    .x_result_sel_sext      (x_result_sel_sext_d),
+`endif    
+    .x_result_sel_logic     (x_result_sel_logic_d),
+`ifdef CFG_USER_ENABLED
+    .x_result_sel_user      (x_result_sel_user_d),
+`endif
+    .x_result_sel_add       (x_result_sel_add_d),
+    .m_result_sel_compare   (m_result_sel_compare_d),
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+    .m_result_sel_shift     (m_result_sel_shift_d),  
+`endif    
+    .w_result_sel_load      (w_result_sel_load_d),
+`ifdef CFG_PL_MULTIPLY_ENABLED
+    .w_result_sel_mul       (w_result_sel_mul_d),
+`endif
+    .x_bypass_enable        (x_bypass_enable_d),
+    .m_bypass_enable        (m_bypass_enable_d),
+    .read_enable_0          (read_enable_0_d),
+    .read_idx_0             (read_idx_0_d),
+    .read_enable_1          (read_enable_1_d),
+    .read_idx_1             (read_idx_1_d),
+    .write_enable           (write_enable_d),
+    .write_idx              (write_idx_d),
+    .immediate              (immediate_d),
+    .branch_offset          (branch_offset_d),
+    .load                   (load_d),
+    .store                  (store_d),
+    .size                   (size_d),
+    .sign_extend            (sign_extend_d),
+    .adder_op               (adder_op_d),
+    .logic_op               (logic_op_d),
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+    .direction              (direction_d),
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+    .shift_left             (shift_left_d),
+    .shift_right            (shift_right_d),
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+    .multiply               (multiply_d),
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+    .divide                 (divide_d),
+    .modulus                (modulus_d),
+`endif
+    .branch                 (branch_d),
+    .bi_unconditional       (bi_unconditional),
+    .bi_conditional         (bi_conditional),
+    .branch_reg             (branch_reg_d),
+    .condition              (condition_d),
+`ifdef CFG_DEBUG_ENABLED
+    .break_opcode           (break_d),
+`endif
+    .scall                  (scall_d),
+    .eret                   (eret_d),
+`ifdef CFG_DEBUG_ENABLED
+    .bret                   (bret_d),
+`endif
+`ifdef CFG_USER_ENABLED
+    .user_opcode            (user_opcode_d),
+`endif
+    .csr_write_enable       (csr_write_enable_d)
+    ); 
+
+// Load/store unit       
+lm32_load_store_unit #(
+    .associativity          (dcache_associativity),
+    .sets                   (dcache_sets),
+    .bytes_per_line         (dcache_bytes_per_line),
+    .base_address           (dcache_base_address),
+    .limit                  (dcache_limit)
+  ) load_store_unit (
+    // ----- Inputs -------
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),
+    // From pipeline
+    .stall_a                (stall_a),
+    .stall_x                (stall_x),
+    .stall_m                (stall_m),
+    .kill_x                 (kill_x),
+    .kill_m                 (kill_m),
+    .exception_m            (exception_m),
+    .store_operand_x        (store_operand_x),
+    .load_store_address_x   (adder_result_x),
+    .load_store_address_m   (operand_m),
+    .load_store_address_w   (operand_w[1:0]),
+    .load_x                 (load_x),
+    .store_x                (store_x),
+    .load_q_x               (load_q_x),
+    .store_q_x              (store_q_x),
+    .load_q_m               (load_q_m),
+    .store_q_m              (store_q_m),
+    .sign_extend_x          (sign_extend_x),
+    .size_x                 (size_x),
+`ifdef CFG_DCACHE_ENABLED
+    .dflush                 (dflush_m),
+`endif
+`ifdef CFG_IROM_ENABLED
+    .irom_data_m            (irom_data_m),
+`endif
+    // From Wishbone
+    .d_dat_i                (D_DAT_I),
+    .d_ack_i                (D_ACK_I),
+    .d_err_i                (D_ERR_I),
+    .d_rty_i                (D_RTY_I),
+    // ----- Outputs -------
+    // To pipeline
+`ifdef CFG_DCACHE_ENABLED
+    .dcache_refill_request  (dcache_refill_request),
+    .dcache_restart_request (dcache_restart_request),
+    .dcache_stall_request   (dcache_stall_request),
+    .dcache_refilling       (dcache_refilling),
+`endif    
+`ifdef CFG_IROM_ENABLED
+    .irom_store_data_m      (irom_store_data_m),
+    .irom_address_xm        (irom_address_xm),
+    .irom_we_xm             (irom_we_xm),
+    .irom_stall_request_x   (irom_stall_request_x),
+`endif
+    .load_data_w            (load_data_w),
+    .stall_wb_load          (stall_wb_load),
+    // To Wishbone
+    .d_dat_o                (D_DAT_O),
+    .d_adr_o                (D_ADR_O),
+    .d_cyc_o                (D_CYC_O),
+    .d_sel_o                (D_SEL_O),
+    .d_stb_o                (D_STB_O),
+    .d_we_o                 (D_WE_O),
+    .d_cti_o                (D_CTI_O),
+    .d_lock_o               (D_LOCK_O),
+    .d_bte_o                (D_BTE_O)
+    );      
+       
+// Adder       
+lm32_adder adder (
+    // ----- Inputs -------
+    .adder_op_x             (adder_op_x),
+    .adder_op_x_n           (adder_op_x_n),
+    .operand_0_x            (operand_0_x),
+    .operand_1_x            (operand_1_x),
+    // ----- Outputs -------
+    .adder_result_x         (adder_result_x),
+    .adder_carry_n_x        (adder_carry_n_x),
+    .adder_overflow_x       (adder_overflow_x)
+    );
+
+// Logic operations
+lm32_logic_op logic_op (
+    // ----- Inputs -------
+    .logic_op_x             (logic_op_x),
+    .operand_0_x            (operand_0_x),
+
+    .operand_1_x            (operand_1_x),
+    // ----- Outputs -------
+    .logic_result_x         (logic_result_x)
+    );
+              
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+// Pipelined barrel-shifter
+lm32_shifter shifter (
+    // ----- Inputs -------
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),
+    .stall_x                (stall_x),
+    .direction_x            (direction_x),
+    .sign_extend_x          (sign_extend_x),
+    .operand_0_x            (operand_0_x),
+    .operand_1_x            (operand_1_x),
+    // ----- Outputs -------
+    .shifter_result_m       (shifter_result_m)
+    );
+`endif
+
+`ifdef CFG_PL_MULTIPLY_ENABLED
+// Pipeline fixed-point multiplier
+lm32_multiplier multiplier (
+    // ----- Inputs -------
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),
+    .stall_x                (stall_x),
+    .stall_m                (stall_m),
+    .operand_0              (d_result_0),
+    .operand_1              (d_result_1),
+    // ----- Outputs -------
+    .result                 (multiplier_result_w)    
+    );
+`endif
+
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+// Multi-cycle arithmetic
+lm32_mc_arithmetic mc_arithmetic (
+    // ----- Inputs -------
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),
+    .stall_d                (stall_d),
+    .kill_x                 (kill_x),
+`ifdef CFG_MC_DIVIDE_ENABLED                  
+    .divide_d               (divide_q_d),
+    .modulus_d              (modulus_q_d),
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED        
+    .multiply_d             (multiply_q_d),
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+    .shift_left_d           (shift_left_q_d),
+    .shift_right_d          (shift_right_q_d),
+    .sign_extend_d          (sign_extend_d),
+`endif    
+    .operand_0_d            (d_result_0),
+    .operand_1_d            (d_result_1),
+    // ----- Outputs -------
+    .result_x               (mc_result_x),
+`ifdef CFG_MC_DIVIDE_ENABLED                  
+    .divide_by_zero_x       (divide_by_zero_x),
+`endif
+    .stall_request_x        (mc_stall_request_x)
+    );
+`endif
+              
+`ifdef CFG_INTERRUPTS_ENABLED
+// Interrupt unit
+lm32_interrupt interrupt_unit (
+    // ----- Inputs -------
+    .clk_i                  (clk_i), 
+    .rst_i                  (rst_i),
+    // From external devices
+    .interrupt              (interrupt),
+    // From pipeline
+    .stall_x                (stall_x),
+`ifdef CFG_DEBUG_ENABLED
+    .non_debug_exception    (non_debug_exception_q_w), 
+    .debug_exception        (debug_exception_q_w),
+`else
+    .exception              (exception_q_w), 
+`endif
+    .eret_q_x               (eret_q_x),
+`ifdef CFG_DEBUG_ENABLED
+    .bret_q_x               (bret_q_x),
+`endif
+    .csr                    (csr_x),
+    .csr_write_data         (operand_1_x),
+    .csr_write_enable       (csr_write_enable_q_x),
+    // ----- Outputs -------
+    .interrupt_exception    (interrupt_exception),
+    // To pipeline
+    .csr_read_data          (interrupt_csr_read_data_x)
+    );
+`endif
+
+`ifdef CFG_JTAG_ENABLED
+// JTAG interface
+lm32_jtag jtag (
+    // ----- Inputs -------
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),
+    // From JTAG
+    .jtag_clk               (jtag_clk),
+    .jtag_update            (jtag_update),
+    .jtag_reg_q             (jtag_reg_q),
+    .jtag_reg_addr_q        (jtag_reg_addr_q),
+    // From pipeline
+`ifdef CFG_JTAG_UART_ENABLED
+    .csr                    (csr_x),
+    .csr_write_data         (operand_1_x),
+    .csr_write_enable       (csr_write_enable_q_x),
+    .stall_x                (stall_x),
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    .jtag_read_data         (jtag_read_data),
+    .jtag_access_complete   (jtag_access_complete),
+`endif
+`ifdef CFG_DEBUG_ENABLED
+    .exception_q_w          (debug_exception_q_w || non_debug_exception_q_w),
+`endif    
+    // ----- Outputs -------
+    // To pipeline
+`ifdef CFG_JTAG_UART_ENABLED
+    .jtx_csr_read_data      (jtx_csr_read_data),
+    .jrx_csr_read_data      (jrx_csr_read_data),
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    .jtag_csr_write_enable  (jtag_csr_write_enable),
+    .jtag_csr_write_data    (jtag_csr_write_data),
+    .jtag_csr               (jtag_csr),
+    .jtag_read_enable       (jtag_read_enable),
+    .jtag_write_enable      (jtag_write_enable),
+    .jtag_write_data        (jtag_write_data),
+    .jtag_address           (jtag_address),
+`endif
+`ifdef CFG_DEBUG_ENABLED
+    .jtag_break             (jtag_break),
+    .jtag_reset             (reset_exception),
+`endif
+    // To JTAG 
+    .jtag_reg_d             (jtag_reg_d),
+    .jtag_reg_addr_d        (jtag_reg_addr_d)
+    );
+`endif
+
+`ifdef CFG_DEBUG_ENABLED
+// Debug unit
+lm32_debug #(
+    .breakpoints            (breakpoints),
+    .watchpoints            (watchpoints)
+  ) hw_debug (
+    // ----- Inputs -------
+    .clk_i                  (clk_i), 
+    .rst_i                  (rst_i),
+    .pc_x                   (pc_x),
+    .load_x                 (load_x),
+    .store_x                (store_x),
+    .load_store_address_x   (adder_result_x),
+    .csr_write_enable_x     (csr_write_enable_q_x),
+    .csr_write_data         (operand_1_x),
+    .csr_x                  (csr_x),
+`ifdef CFG_HW_DEBUG_ENABLED
+    .jtag_csr_write_enable  (jtag_csr_write_enable),
+    .jtag_csr_write_data    (jtag_csr_write_data),
+    .jtag_csr               (jtag_csr),
+`endif
+`ifdef LM32_SINGLE_STEP_ENABLED
+    .eret_q_x               (eret_q_x),
+    .bret_q_x               (bret_q_x),
+    .stall_x                (stall_x),
+    .exception_x            (exception_x),
+    .q_x                    (q_x),
+`ifdef CFG_DCACHE_ENABLED
+    .dcache_refill_request  (dcache_refill_request),
+`endif
+`endif
+    // ----- Outputs -------
+`ifdef LM32_SINGLE_STEP_ENABLED
+    .dc_ss                  (dc_ss),
+`endif
+    .dc_re                  (dc_re),
+    .bp_match               (bp_match),
+    .wp_match               (wp_match)
+    );
+`endif
+
+// Register file
+
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+   /*----------------------------------------------------------------------
+    Register File is implemented using EBRs. There can be three accesses to
+    the register file in each cycle: two reads and one write. On-chip block
+    RAM has two read/write ports. To accomodate three accesses, two on-chip
+    block RAMs are used (each register file "write" is made to both block
+    RAMs).
+    
+    One limitation of the on-chip block RAMs is that one cannot perform a 
+    read and write to same location in a cycle (if this is done, then the
+    data read out is indeterminate).
+    ----------------------------------------------------------------------*/
+   wire [31:0] regfile_data_0, regfile_data_1;
+   reg [31:0]  w_result_d;
+   reg                regfile_raw_0, regfile_raw_0_nxt;
+   reg                regfile_raw_1, regfile_raw_1_nxt;
+   
+   /*----------------------------------------------------------------------
+    Check if read and write is being performed to same register in current 
+    cycle? This is done by comparing the read and write IDXs.
+    ----------------------------------------------------------------------*/
+   always @(reg_write_enable_q_w or write_idx_w or instruction_f)
+     begin
+       if (reg_write_enable_q_w
+           && (write_idx_w == instruction_f[25:21]))
+         regfile_raw_0_nxt = 1'b1;
+       else
+         regfile_raw_0_nxt = 1'b0;
+       
+       if (reg_write_enable_q_w
+           && (write_idx_w == instruction_f[20:16]))
+         regfile_raw_1_nxt = 1'b1;
+       else
+         regfile_raw_1_nxt = 1'b0;
+     end
+   
+   /*----------------------------------------------------------------------
+    Select latched (delayed) write value or data from register file. If 
+    read in previous cycle was performed to register written to in same
+    cycle, then latched (delayed) write value is selected.
+    ----------------------------------------------------------------------*/
+   always @(regfile_raw_0 or w_result_d or regfile_data_0)
+     if (regfile_raw_0)
+       reg_data_live_0 = w_result_d;
+     else
+       reg_data_live_0 = regfile_data_0;
+   
+   /*----------------------------------------------------------------------
+    Select latched (delayed) write value or data from register file. If 
+    read in previous cycle was performed to register written to in same
+    cycle, then latched (delayed) write value is selected.
+    ----------------------------------------------------------------------*/
+   always @(regfile_raw_1 or w_result_d or regfile_data_1)
+     if (regfile_raw_1)
+       reg_data_live_1 = w_result_d;
+     else
+       reg_data_live_1 = regfile_data_1;
+   
+   /*----------------------------------------------------------------------
+    Latch value written to register file
+    ----------------------------------------------------------------------*/
+   always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+     if (rst_i == `TRUE)
+       begin
+         regfile_raw_0 <= 1'b0;
+         regfile_raw_1 <= 1'b0;
+         w_result_d <= 32'b0;
+       end
+     else
+       begin
+         regfile_raw_0 <= regfile_raw_0_nxt;
+         regfile_raw_1 <= regfile_raw_1_nxt;
+         w_result_d <= w_result;
+       end
+   
+   /*----------------------------------------------------------------------
+    Register file instantiation as Pseudo-Dual Port EBRs.
+    ----------------------------------------------------------------------*/
+   // Modified by GSI: removed non-portable RAM instantiation
+   lm32_dp_ram
+     #(
+       // ----- Parameters -----
+       .addr_depth(1<<5),
+       .addr_width(5),
+       .data_width(32)
+       )
+   reg_0
+     (
+      // ----- Inputs -----
+      .clk_i   (clk_i),
+      .rst_i   (rst_i), 
+      .we_i    (reg_write_enable_q_w),
+      .wdata_i (w_result),
+      .waddr_i (write_idx_w),
+      .raddr_i (instruction_f[25:21]),
+      // ----- Outputs -----
+      .rdata_o (regfile_data_0)
+      );
+
+   lm32_dp_ram
+     #(
+       .addr_depth(1<<5),
+       .addr_width(5),
+       .data_width(32)
+       )
+   reg_1
+     (
+      // ----- Inputs -----
+      .clk_i   (clk_i),
+      .rst_i   (rst_i), 
+      .we_i    (reg_write_enable_q_w),
+      .wdata_i (w_result),
+      .waddr_i (write_idx_w),
+      .raddr_i (instruction_f[20:16]),
+      // ----- Outputs -----
+      .rdata_o (regfile_data_1)
+      );
+`endif
+
+`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE
+   pmi_ram_dp
+     #(
+       // ----- Parameters -----
+       .pmi_wr_addr_depth(1<<5),
+       .pmi_wr_addr_width(5),
+       .pmi_wr_data_width(32),
+       .pmi_rd_addr_depth(1<<5),
+       .pmi_rd_addr_width(5),
+       .pmi_rd_data_width(32),
+       .pmi_regmode("noreg"),
+       .pmi_gsr("enable"),
+       .pmi_resetmode("sync"),
+       .pmi_init_file("none"),
+       .pmi_init_file_format("binary"),
+       .pmi_family(`LATTICE_FAMILY),
+       .module_type("pmi_ram_dp")
+       )
+   reg_0
+     (
+      // ----- Inputs -----
+      .Data(w_result),
+      .WrAddress(write_idx_w),
+      .RdAddress(read_idx_0_d),
+      .WrClock(clk_i),
+      .RdClock(clk_n_i),
+      .WrClockEn(`TRUE),
+      .RdClockEn(stall_f == `FALSE),
+      .WE(reg_write_enable_q_w),
+      .Reset(rst_i), 
+      // ----- Outputs -----
+      .Q(reg_data_0)
+      );
+   
+   pmi_ram_dp
+     #(
+       // ----- Parameters -----
+       .pmi_wr_addr_depth(1<<5),
+       .pmi_wr_addr_width(5),
+       .pmi_wr_data_width(32),
+       .pmi_rd_addr_depth(1<<5),
+       .pmi_rd_addr_width(5),
+       .pmi_rd_data_width(32),
+       .pmi_regmode("noreg"),
+       .pmi_gsr("enable"),
+       .pmi_resetmode("sync"),
+       .pmi_init_file("none"),
+       .pmi_init_file_format("binary"),
+       .pmi_family(`LATTICE_FAMILY),
+       .module_type("pmi_ram_dp")
+       )
+   reg_1
+     (
+      // ----- Inputs -----
+      .Data(w_result),
+      .WrAddress(write_idx_w),
+      .RdAddress(read_idx_1_d),
+      .WrClock(clk_i),
+      .RdClock(clk_n_i),
+      .WrClockEn(`TRUE),
+      .RdClockEn(stall_f == `FALSE),
+      .WE(reg_write_enable_q_w),
+      .Reset(rst_i), 
+      // ----- Outputs -----
+      .Q(reg_data_1)
+      );
+`endif
+
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+// Select between buffered and live data from register file
+assign reg_data_0 = use_buf ? reg_data_buf_0 : reg_data_live_0;
+assign reg_data_1 = use_buf ? reg_data_buf_1 : reg_data_live_1;
+`endif
+`ifdef LM32_EBR_REGISTER_FILE
+`else
+// Register file read ports
+assign reg_data_0 = registers[read_idx_0_d];
+assign reg_data_1 = registers[read_idx_1_d];
+`endif
+
+// Detect read-after-write hazzards
+assign raw_x_0 = (write_idx_x == read_idx_0_d) && (write_enable_q_x == `TRUE);
+assign raw_m_0 = (write_idx_m == read_idx_0_d) && (write_enable_q_m == `TRUE);
+assign raw_w_0 = (write_idx_w == read_idx_0_d) && (write_enable_q_w == `TRUE);
+assign raw_x_1 = (write_idx_x == read_idx_1_d) && (write_enable_q_x == `TRUE);
+assign raw_m_1 = (write_idx_m == read_idx_1_d) && (write_enable_q_m == `TRUE);
+assign raw_w_1 = (write_idx_w == read_idx_1_d) && (write_enable_q_w == `TRUE);
+
+// Interlock detection - Raise an interlock for RAW hazzards 
+always @(*)
+begin
+    if (   (   (x_bypass_enable_x == `FALSE)
+            && (   ((read_enable_0_d == `TRUE) && (raw_x_0 == `TRUE))
+                || ((read_enable_1_d == `TRUE) && (raw_x_1 == `TRUE))
+               )
+           )
+        || (   (m_bypass_enable_m == `FALSE)
+            && (   ((read_enable_0_d == `TRUE) && (raw_m_0 == `TRUE))
+                || ((read_enable_1_d == `TRUE) && (raw_m_1 == `TRUE))
+               )
+           )
+       )
+        interlock = `TRUE;
+    else
+        interlock = `FALSE;
+end
+
+// Bypass for reg port 0
+always @(*)
+begin
+    if (raw_x_0 == `TRUE)        
+        bypass_data_0 = x_result;
+    else if (raw_m_0 == `TRUE)
+        bypass_data_0 = m_result;
+    else if (raw_w_0 == `TRUE)
+        bypass_data_0 = w_result;
+    else
+        bypass_data_0 = reg_data_0;
+end
+
+// Bypass for reg port 1
+always @(*)
+begin
+    if (raw_x_1 == `TRUE)
+        bypass_data_1 = x_result;
+    else if (raw_m_1 == `TRUE)
+        bypass_data_1 = m_result;
+    else if (raw_w_1 == `TRUE)
+        bypass_data_1 = w_result;
+    else
+        bypass_data_1 = reg_data_1;
+end
+
+   /*----------------------------------------------------------------------
+    Branch prediction is performed in D stage of pipeline. Only PC-relative
+    branches are predicted: forward-pointing conditional branches are not-
+    taken, while backward-pointing conditional branches are taken. 
+    Unconditional branches are always predicted taken!
+    ----------------------------------------------------------------------*/
+   assign branch_predict_d = bi_unconditional | bi_conditional;
+   assign branch_predict_taken_d = bi_unconditional ? 1'b1 : (bi_conditional ? instruction_d[15] : 1'b0);
+   
+   // Compute branch target address: Branch PC PLUS Offset
+   assign branch_target_d = pc_d + branch_offset_d;
+
+   // Compute fetch address. Address of instruction sequentially after the
+   // branch if branch is not taken. Target address of branch is branch is
+   // taken
+   assign branch_predict_address_d = branch_predict_taken_d ? branch_target_d : pc_f;
+
+// D stage result selection
+always @(*)
+begin
+    d_result_0 = d_result_sel_0_d[0] ? {pc_f, 2'b00} : bypass_data_0; 
+    case (d_result_sel_1_d)
+    `LM32_D_RESULT_SEL_1_ZERO:      d_result_1 = {`LM32_WORD_WIDTH{1'b0}};
+    `LM32_D_RESULT_SEL_1_REG_1:     d_result_1 = bypass_data_1;
+    `LM32_D_RESULT_SEL_1_IMMEDIATE: d_result_1 = immediate_d;
+    default:                        d_result_1 = {`LM32_WORD_WIDTH{1'bx}};
+    endcase
+end
+
+`ifdef CFG_USER_ENABLED    
+// Operands for user-defined instructions
+assign user_operand_0 = operand_0_x;
+assign user_operand_1 = operand_1_x;
+`endif
+
+`ifdef CFG_SIGN_EXTEND_ENABLED
+// Sign-extension
+assign sextb_result_x = {{24{operand_0_x[7]}}, operand_0_x[7:0]};
+assign sexth_result_x = {{16{operand_0_x[15]}}, operand_0_x[15:0]};
+assign sext_result_x = size_x == `LM32_SIZE_BYTE ? sextb_result_x : sexth_result_x;
+`endif
+
+`ifdef LM32_NO_BARREL_SHIFT
+// Only single bit shift operations are supported when barrel-shifter isn't implemented
+assign shifter_result_x = {operand_0_x[`LM32_WORD_WIDTH-1] & sign_extend_x, operand_0_x[`LM32_WORD_WIDTH-1:1]};
+`endif
+
+// Condition evaluation
+assign cmp_zero = operand_0_x == operand_1_x;
+assign cmp_negative = adder_result_x[`LM32_WORD_WIDTH-1];
+assign cmp_overflow = adder_overflow_x;
+assign cmp_carry_n = adder_carry_n_x;
+always @(*)
+begin
+    case (condition_x)
+    `LM32_CONDITION_U1:   condition_met_x = `TRUE;
+    `LM32_CONDITION_U2:   condition_met_x = `TRUE;
+    `LM32_CONDITION_E:    condition_met_x = cmp_zero;
+    `LM32_CONDITION_NE:   condition_met_x = !cmp_zero;
+    `LM32_CONDITION_G:    condition_met_x = !cmp_zero && (cmp_negative == cmp_overflow);
+    `LM32_CONDITION_GU:   condition_met_x = cmp_carry_n && !cmp_zero;
+    `LM32_CONDITION_GE:   condition_met_x = cmp_negative == cmp_overflow;
+    `LM32_CONDITION_GEU:  condition_met_x = cmp_carry_n;
+    default:              condition_met_x = 1'bx;
+    endcase 
+end
+
+// X stage result selection
+always @(*)
+begin
+    x_result =   x_result_sel_add_x ? adder_result_x 
+               : x_result_sel_csr_x ? csr_read_data_x
+`ifdef CFG_SIGN_EXTEND_ENABLED
+               : x_result_sel_sext_x ? sext_result_x
+`endif
+`ifdef CFG_USER_ENABLED
+               : x_result_sel_user_x ? user_result
+`endif
+`ifdef LM32_NO_BARREL_SHIFT
+               : x_result_sel_shift_x ? shifter_result_x
+`endif
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+               : x_result_sel_mc_arith_x ? mc_result_x
+`endif
+               : logic_result_x;
+end
+
+// M stage result selection
+always @(*)
+begin
+    m_result =   m_result_sel_compare_m ? {{`LM32_WORD_WIDTH-1{1'b0}}, condition_met_m}
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+               : m_result_sel_shift_m ? shifter_result_m
+`endif
+               : operand_m; 
+end
+
+// W stage result selection
+always @(*)
+begin
+    w_result =    w_result_sel_load_w ? load_data_w
+`ifdef CFG_PL_MULTIPLY_ENABLED
+                : w_result_sel_mul_w ? multiplier_result_w
+`endif
+                : operand_w;
+end
+
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+// Indicate when a branch should be taken in X stage
+assign branch_taken_x =      (stall_x == `FALSE)
+                          && (   (branch_x == `TRUE)
+                              && ((condition_x == `LM32_CONDITION_U1) || (condition_x == `LM32_CONDITION_U2))
+                              && (valid_x == `TRUE)
+                              && (branch_predict_x == `FALSE)
+                             ); 
+`endif
+
+// Indicate when a branch should be taken in M stage (exceptions are a type of branch)
+assign branch_taken_m =      (stall_m == `FALSE) 
+                          && (   (   (branch_m == `TRUE) 
+                                  && (valid_m == `TRUE)
+                                  && (   (   (condition_met_m == `TRUE)
+                                         && (branch_predict_taken_m == `FALSE)
+                                        )
+                                     || (   (condition_met_m == `FALSE)
+                                         && (branch_predict_m == `TRUE)
+                                         && (branch_predict_taken_m == `TRUE)
+                                        )
+                                    )
+                                 ) 
+                              || (exception_m == `TRUE)
+                             );
+
+// Indicate when a branch in M stage is mispredicted as being taken
+assign branch_mispredict_taken_m =    (condition_met_m == `FALSE)
+                                   && (branch_predict_m == `TRUE)
+                                  && (branch_predict_taken_m == `TRUE);
+   
+// Indicate when a branch in M stage will cause flush in X stage
+assign branch_flushX_m =    (stall_m == `FALSE)
+                         && (   (   (branch_m == `TRUE) 
+                                 && (valid_m == `TRUE)
+                                && (   (condition_met_m == `TRUE)
+                                    || (   (condition_met_m == `FALSE)
+                                        && (branch_predict_m == `TRUE)
+                                        && (branch_predict_taken_m == `TRUE)
+                                       )
+                                   )
+                               )
+                            || (exception_m == `TRUE)
+                           );
+
+// Generate signal that will kill instructions in each pipeline stage when necessary
+assign kill_f =    (   (valid_d == `TRUE)
+                    && (branch_predict_taken_d == `TRUE)
+                  )
+                || (branch_taken_m == `TRUE) 
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+                || (branch_taken_x == `TRUE)
+`endif
+`ifdef CFG_ICACHE_ENABLED
+                || (icache_refill_request == `TRUE) 
+`endif
+`ifdef CFG_DCACHE_ENABLED                
+                || (dcache_refill_request == `TRUE)
+`endif
+                ;
+assign kill_d =    (branch_taken_m == `TRUE) 
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+                || (branch_taken_x == `TRUE)
+`endif
+`ifdef CFG_ICACHE_ENABLED
+                || (icache_refill_request == `TRUE)     
+`endif                
+`ifdef CFG_DCACHE_ENABLED                
+                || (dcache_refill_request == `TRUE)
+`endif
+                ;
+assign kill_x =    (branch_flushX_m == `TRUE) 
+`ifdef CFG_DCACHE_ENABLED                
+                || (dcache_refill_request == `TRUE)
+`endif
+                ;
+assign kill_m =    `FALSE
+`ifdef CFG_DCACHE_ENABLED                
+                || (dcache_refill_request == `TRUE)
+`endif
+                ;                
+assign kill_w =    `FALSE
+`ifdef CFG_DCACHE_ENABLED                
+                || (dcache_refill_request == `TRUE)
+`endif                
+                ;
+
+// Exceptions
+
+`ifdef CFG_DEBUG_ENABLED
+assign breakpoint_exception =    (   (   (break_x == `TRUE)
+                                     || (bp_match == `TRUE)
+                                    )
+                                 && (valid_x == `TRUE)
+                                )
+`ifdef CFG_JTAG_ENABLED
+                              || (jtag_break == `TRUE)
+`endif
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+                              || (ext_break_r == `TRUE)
+`endif
+                              ;
+`endif
+
+`ifdef CFG_DEBUG_ENABLED
+assign watchpoint_exception = wp_match == `TRUE;
+`endif
+
+`ifdef CFG_BUS_ERRORS_ENABLED
+assign instruction_bus_error_exception = (   (bus_error_x == `TRUE)
+                                          && (valid_x == `TRUE)
+                                         );
+assign data_bus_error_exception = data_bus_error_seen == `TRUE;
+`endif
+
+`ifdef CFG_MC_DIVIDE_ENABLED
+assign divide_by_zero_exception = divide_by_zero_x == `TRUE;
+`endif
+
+assign system_call_exception = (   (scall_x == `TRUE)
+`ifdef CFG_BUS_ERRORS_ENABLED
+                                && (valid_x == `TRUE)
+`endif
+                              );
+
+`ifdef CFG_DEBUG_ENABLED
+assign debug_exception_x =  (breakpoint_exception == `TRUE)
+                         || (watchpoint_exception == `TRUE)
+                         ;
+
+assign non_debug_exception_x = (system_call_exception == `TRUE)
+`ifdef CFG_JTAG_ENABLED
+                            || (reset_exception == `TRUE)
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+                            || (instruction_bus_error_exception == `TRUE)
+                            || (data_bus_error_exception == `TRUE)
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+                            || (divide_by_zero_exception == `TRUE)
+`endif
+`ifdef CFG_INTERRUPTS_ENABLED
+                            || (   (interrupt_exception == `TRUE)
+`ifdef LM32_SINGLE_STEP_ENABLED
+                                && (dc_ss == `FALSE)
+`endif                            
+`ifdef CFG_BUS_ERRORS_ENABLED
+                               && (store_q_m == `FALSE)
+                               && (D_CYC_O == `FALSE)
+`endif
+                               )
+`endif
+                            ;
+
+assign exception_x = (debug_exception_x == `TRUE) || (non_debug_exception_x == `TRUE);
+`else
+assign exception_x =           (system_call_exception == `TRUE)
+`ifdef CFG_BUS_ERRORS_ENABLED
+                            || (instruction_bus_error_exception == `TRUE)
+                            || (data_bus_error_exception == `TRUE)
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+                            || (divide_by_zero_exception == `TRUE)
+`endif
+`ifdef CFG_INTERRUPTS_ENABLED
+                            || (   (interrupt_exception == `TRUE)
+`ifdef LM32_SINGLE_STEP_ENABLED
+                                && (dc_ss == `FALSE)
+`endif                            
+`ifdef CFG_BUS_ERRORS_ENABLED
+                               && (store_q_m == `FALSE)
+                               && (D_CYC_O == `FALSE)
+`endif
+                               )
+`endif
+                            ;
+`endif
+
+// Exception ID
+always @(*)
+begin
+`ifdef CFG_DEBUG_ENABLED
+`ifdef CFG_JTAG_ENABLED
+    if (reset_exception == `TRUE)
+        eid_x = `LM32_EID_RESET;
+    else
+`endif     
+`ifdef CFG_BUS_ERRORS_ENABLED
+         if (data_bus_error_exception == `TRUE)
+        eid_x = `LM32_EID_DATA_BUS_ERROR;
+    else
+`endif
+         if (breakpoint_exception == `TRUE)
+        eid_x = `LM32_EID_BREAKPOINT;
+    else
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+         if (data_bus_error_exception == `TRUE)
+        eid_x = `LM32_EID_DATA_BUS_ERROR;
+    else
+         if (instruction_bus_error_exception == `TRUE)
+        eid_x = `LM32_EID_INST_BUS_ERROR;
+    else
+`endif
+`ifdef CFG_DEBUG_ENABLED
+         if (watchpoint_exception == `TRUE)
+        eid_x = `LM32_EID_WATCHPOINT;
+    else 
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+         if (divide_by_zero_exception == `TRUE)
+        eid_x = `LM32_EID_DIVIDE_BY_ZERO;
+    else
+`endif
+`ifdef CFG_INTERRUPTS_ENABLED
+         if (   (interrupt_exception == `TRUE)
+`ifdef LM32_SINGLE_STEP_ENABLED
+             && (dc_ss == `FALSE)
+`endif                            
+            )
+        eid_x = `LM32_EID_INTERRUPT;
+    else
+`endif
+        eid_x = `LM32_EID_SCALL;
+end
+
+// Stall generation
+
+assign stall_a = (stall_f == `TRUE);
+                
+assign stall_f = (stall_d == `TRUE);
+                
+assign stall_d =   (stall_x == `TRUE) 
+                || (   (interlock == `TRUE)
+                    && (kill_d == `FALSE)
+                   ) 
+               || (   (   (eret_d == `TRUE)
+                       || (scall_d == `TRUE)
+`ifdef CFG_BUS_ERRORS_ENABLED
+                       || (bus_error_d == `TRUE)
+`endif
+                      )
+                   && (   (load_q_x == `TRUE)
+                       || (load_q_m == `TRUE)
+                       || (store_q_x == `TRUE)
+                       || (store_q_m == `TRUE)
+                       || (D_CYC_O == `TRUE)
+                      )
+                    && (kill_d == `FALSE)
+                  )
+`ifdef CFG_DEBUG_ENABLED
+               || (   (   (break_d == `TRUE)
+                       || (bret_d == `TRUE)
+                      )
+                   && (   (load_q_x == `TRUE)
+                       || (store_q_x == `TRUE)
+                       || (load_q_m == `TRUE)
+                       || (store_q_m == `TRUE)
+                       || (D_CYC_O == `TRUE)
+                      )
+                    && (kill_d == `FALSE)
+                  )
+`endif                   
+                || (   (csr_write_enable_d == `TRUE)
+                    && (load_q_x == `TRUE)
+                   )                      
+                ;
+                
+assign stall_x =    (stall_m == `TRUE)
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+                 || (   (mc_stall_request_x == `TRUE)
+                     && (kill_x == `FALSE)
+                    ) 
+`endif
+`ifdef CFG_IROM_ENABLED
+                 // Stall load/store instruction in D stage if there is an ongoing store
+                 // operation to instruction ROM in M stage
+                 || (   (irom_stall_request_x == `TRUE)
+                    && (   (load_d == `TRUE)
+                        || (store_d == `TRUE)
+                       )
+                   )
+`endif
+                 ;
+
+assign stall_m =    (stall_wb_load == `TRUE)
+`ifdef CFG_SIZE_OVER_SPEED
+                 || (D_CYC_O == `TRUE)
+`else
+                 || (   (D_CYC_O == `TRUE)
+                     && (   (store_m == `TRUE)
+                        /*
+                         Bug: Following loop does not allow interrupts to be services since
+                         either D_CYC_O or store_m is always high during entire duration of
+                         loop.
+                         L1:   addi    r1, r1, 1
+                               sw      (r2,0), r1
+                               bi      L1
+                         
+                         Introduce a single-cycle stall when a wishbone cycle is in progress
+                         and a new store instruction is in Execute stage and a interrupt
+                         exception has occured. This stall will ensure that D_CYC_O and 
+                         store_m will both be low for one cycle.
+                         */
+`ifdef CFG_INTERRUPTS_ENABLED
+                        || ((store_x == `TRUE) && (interrupt_exception == `TRUE))
+`endif
+                         || (load_m == `TRUE)
+                         || (load_x == `TRUE)
+                        ) 
+                    ) 
+`endif                 
+`ifdef CFG_DCACHE_ENABLED
+                 || (dcache_stall_request == `TRUE)     // Need to stall in case a taken branch is in M stage and data cache is only being flush, so wont be restarted
+`endif                                    
+`ifdef CFG_ICACHE_ENABLED
+                 || (icache_stall_request == `TRUE)     // Pipeline needs to be stalled otherwise branches may be lost
+                 || ((I_CYC_O == `TRUE) && ((branch_m == `TRUE) || (exception_m == `TRUE))) 
+`else
+`ifdef CFG_IWB_ENABLED
+                 || (I_CYC_O == `TRUE)            
+`endif
+`endif                               
+`ifdef CFG_USER_ENABLED
+                 || (   (user_valid == `TRUE)           // Stall whole pipeline, rather than just X stage, where the instruction is, so we don't have to worry about exceptions (maybe)
+                     && (user_complete == `FALSE)
+                    )
+`endif
+                 ;      
+
+// Qualify state changing control signals
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+assign q_d = (valid_d == `TRUE) && (kill_d == `FALSE);
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+assign shift_left_q_d = (shift_left_d == `TRUE) && (q_d == `TRUE);
+assign shift_right_q_d = (shift_right_d == `TRUE) && (q_d == `TRUE);
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+assign multiply_q_d = (multiply_d == `TRUE) && (q_d == `TRUE);
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+assign divide_q_d = (divide_d == `TRUE) && (q_d == `TRUE);
+assign modulus_q_d = (modulus_d == `TRUE) && (q_d == `TRUE);
+`endif
+assign q_x = (valid_x == `TRUE) && (kill_x == `FALSE);
+assign csr_write_enable_q_x = (csr_write_enable_x == `TRUE) && (q_x == `TRUE);
+assign eret_q_x = (eret_x == `TRUE) && (q_x == `TRUE);
+`ifdef CFG_DEBUG_ENABLED
+assign bret_q_x = (bret_x == `TRUE) && (q_x == `TRUE);
+`endif
+assign load_q_x = (load_x == `TRUE) 
+               && (q_x == `TRUE)
+`ifdef CFG_DEBUG_ENABLED
+               && (bp_match == `FALSE)
+`endif
+                  ;
+assign store_q_x = (store_x == `TRUE) 
+               && (q_x == `TRUE)
+`ifdef CFG_DEBUG_ENABLED
+               && (bp_match == `FALSE)
+`endif
+                  ;
+`ifdef CFG_USER_ENABLED
+assign user_valid = (x_result_sel_user_x == `TRUE) && (q_x == `TRUE);
+`endif                              
+assign q_m = (valid_m == `TRUE) && (kill_m == `FALSE) && (exception_m == `FALSE);
+assign load_q_m = (load_m == `TRUE) && (q_m == `TRUE);
+assign store_q_m = (store_m == `TRUE) && (q_m == `TRUE);
+`ifdef CFG_DEBUG_ENABLED
+assign debug_exception_q_w = ((debug_exception_w == `TRUE) && (valid_w == `TRUE));
+assign non_debug_exception_q_w = ((non_debug_exception_w == `TRUE) && (valid_w == `TRUE));        
+`else
+assign exception_q_w = ((exception_w == `TRUE) && (valid_w == `TRUE));        
+`endif
+// Don't qualify register write enables with kill, as the signal is needed early, and it doesn't matter if the instruction is killed (except for the actual write - but that is handled separately)
+assign write_enable_q_x = (write_enable_x == `TRUE) && (valid_x == `TRUE) && (branch_flushX_m == `FALSE);
+assign write_enable_q_m = (write_enable_m == `TRUE) && (valid_m == `TRUE);
+assign write_enable_q_w = (write_enable_w == `TRUE) && (valid_w == `TRUE);
+// The enable that actually does write the registers needs to be qualified with kill
+assign reg_write_enable_q_w = (write_enable_w == `TRUE) && (kill_w == `FALSE) && (valid_w == `TRUE);
+
+// Configuration (CFG) CSR
+assign cfg = {
+              `LM32_REVISION,
+              watchpoints[3:0],
+              breakpoints[3:0],
+              interrupts[5:0],
+`ifdef CFG_JTAG_UART_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_ROM_DEBUG_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_DEBUG_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_ICACHE_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_DCACHE_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_CYCLE_COUNTER_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_USER_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef LM32_BARREL_SHIFT_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+              `TRUE,
+`else
+              `FALSE,
+`endif
+`ifdef LM32_MULTIPLY_ENABLED 
+              `TRUE
+`else
+              `FALSE
+`endif
+              };
+
+assign cfg2 = {
+                    30'b0,
+`ifdef CFG_IROM_ENABLED
+                    `TRUE,
+`else
+                    `FALSE,
+`endif
+`ifdef CFG_DRAM_ENABLED
+                    `TRUE
+`else
+                    `FALSE
+`endif
+                    };
+   
+// Cache flush
+`ifdef CFG_ICACHE_ENABLED
+assign iflush = (   (csr_write_enable_d == `TRUE) 
+                 && (csr_d == `LM32_CSR_ICC)
+                 && (stall_d == `FALSE)
+                 && (kill_d == `FALSE)
+                 && (valid_d == `TRUE))
+// Added by GSI: needed to flush cache after loading firmware per JTAG
+`ifdef CFG_HW_DEBUG_ENABLED
+             ||
+                (   (jtag_csr_write_enable == `TRUE)
+                && (jtag_csr == `LM32_CSR_ICC))
+`endif
+                ;
+`endif 
+`ifdef CFG_DCACHE_ENABLED
+assign dflush_x = (   (csr_write_enable_q_x == `TRUE) 
+                   && (csr_x == `LM32_CSR_DCC))
+// Added by GSI: needed to flush cache after loading firmware per JTAG
+`ifdef CFG_HW_DEBUG_ENABLED
+               ||
+                  (   (jtag_csr_write_enable == `TRUE)
+                  && (jtag_csr == `LM32_CSR_DCC))
+`endif
+                  ;
+`endif 
+
+// Extract CSR index
+assign csr_d = read_idx_0_d[`LM32_CSR_RNG];
+
+// CSR reads
+always @(*)
+begin
+    case (csr_x)
+`ifdef CFG_INTERRUPTS_ENABLED
+    `LM32_CSR_IE,
+    `LM32_CSR_IM,
+    `LM32_CSR_IP:   csr_read_data_x = interrupt_csr_read_data_x;  
+`endif
+`ifdef CFG_CYCLE_COUNTER_ENABLED
+    `LM32_CSR_CC:   csr_read_data_x = cc;
+`endif
+    `LM32_CSR_CFG:  csr_read_data_x = cfg;
+    `LM32_CSR_EBA:  csr_read_data_x = {eba, 8'h00};
+`ifdef CFG_DEBUG_ENABLED
+    `LM32_CSR_DEBA: csr_read_data_x = {deba, 8'h00};
+`endif
+`ifdef CFG_JTAG_UART_ENABLED
+    `LM32_CSR_JTX:  csr_read_data_x = jtx_csr_read_data;  
+    `LM32_CSR_JRX:  csr_read_data_x = jrx_csr_read_data;
+`endif
+    `LM32_CSR_CFG2: csr_read_data_x = cfg2;
+      
+    default:        csr_read_data_x = {`LM32_WORD_WIDTH{1'bx}};
+    endcase
+end
+
+/////////////////////////////////////////////////////
+// Sequential Logic
+/////////////////////////////////////////////////////
+
+// Exception Base Address (EBA) CSR
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        eba <= eba_reset[`LM32_PC_WIDTH+2-1:8];
+    else
+    begin
+        if ((csr_write_enable_q_x == `TRUE) && (csr_x == `LM32_CSR_EBA) && (stall_x == `FALSE))
+            eba <= operand_1_x[`LM32_PC_WIDTH+2-1:8];
+`ifdef CFG_HW_DEBUG_ENABLED
+        if ((jtag_csr_write_enable == `TRUE) && (jtag_csr == `LM32_CSR_EBA))
+            eba <= jtag_csr_write_data[`LM32_PC_WIDTH+2-1:8];
+`endif
+    end
+end
+
+`ifdef CFG_DEBUG_ENABLED
+// Debug Exception Base Address (DEBA) CSR
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        deba <= deba_reset[`LM32_PC_WIDTH+2-1:8];
+    else
+    begin
+        if ((csr_write_enable_q_x == `TRUE) && (csr_x == `LM32_CSR_DEBA) && (stall_x == `FALSE))
+            deba <= operand_1_x[`LM32_PC_WIDTH+2-1:8];
+`ifdef CFG_HW_DEBUG_ENABLED
+        if ((jtag_csr_write_enable == `TRUE) && (jtag_csr == `LM32_CSR_DEBA))
+            deba <= jtag_csr_write_data[`LM32_PC_WIDTH+2-1:8];
+`endif
+    end
+end
+`endif
+
+// Cycle Counter (CC) CSR
+`ifdef CFG_CYCLE_COUNTER_ENABLED
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        cc <= {`LM32_WORD_WIDTH{1'b0}};
+    else
+        cc <= cc + 1'b1;
+end
+`endif
+
+`ifdef CFG_BUS_ERRORS_ENABLED
+// Watch for data bus errors
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        data_bus_error_seen <= `FALSE;
+    else
+    begin
+        // Set flag when bus error is detected
+        if ((D_ERR_I == `TRUE) && (D_CYC_O == `TRUE))
+            data_bus_error_seen <= `TRUE;
+        // Clear flag when exception is taken
+        if ((exception_m == `TRUE) && (kill_m == `FALSE))
+            data_bus_error_seen <= `FALSE;
+    end
+end
+`endif
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        ext_break_r <= `FALSE;
+    else
+    begin
+               if (ext_break == `TRUE)
+                       ext_break_r <= `TRUE;
+        if (debug_exception_q_w == `TRUE)
+            ext_break_r <= `FALSE;
+    end
+end
+`endif
+
+// Valid bits to indicate whether an instruction in a partcular pipeline stage is valid or not  
+
+`ifdef CFG_ICACHE_ENABLED
+`ifdef CFG_DCACHE_ENABLED
+always @(*)
+begin
+    if (   (icache_refill_request == `TRUE) 
+        || (dcache_refill_request == `TRUE)
+       )
+        valid_a = `FALSE;
+    else if (   (icache_restart_request == `TRUE) 
+             || (dcache_restart_request == `TRUE) 
+            ) 
+        valid_a = `TRUE;
+    else 
+        valid_a = !icache_refilling && !dcache_refilling;
+end 
+`else
+always @(*)
+begin
+    if (icache_refill_request == `TRUE) 
+        valid_a = `FALSE;
+    else if (icache_restart_request == `TRUE) 
+        valid_a = `TRUE;
+    else 
+        valid_a = !icache_refilling;
+end 
+`endif
+`else
+`ifdef CFG_DCACHE_ENABLED
+always @(*)
+begin
+    if (dcache_refill_request == `TRUE) 
+        valid_a = `FALSE;
+    else if (dcache_restart_request == `TRUE) 
+        valid_a = `TRUE;
+    else 
+        valid_a = !dcache_refilling;
+end 
+`endif
+`endif
+
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        valid_f <= `FALSE;
+        valid_d <= `FALSE;
+        valid_x <= `FALSE;
+        valid_m <= `FALSE;
+        valid_w <= `FALSE;
+    end
+    else
+    begin    
+        if ((kill_f == `TRUE) || (stall_a == `FALSE))
+`ifdef LM32_CACHE_ENABLED
+            valid_f <= valid_a;    
+`else
+            valid_f <= `TRUE;
+`endif            
+        else if (stall_f == `FALSE)
+            valid_f <= `FALSE;            
+
+        if (kill_d == `TRUE)
+            valid_d <= `FALSE;
+        else if (stall_f == `FALSE)
+            valid_d <= valid_f & !kill_f;
+        else if (stall_d == `FALSE)
+            valid_d <= `FALSE;
+       
+        if (stall_d == `FALSE)
+            valid_x <= valid_d & !kill_d;
+        else if (kill_x == `TRUE)
+            valid_x <= `FALSE;
+        else if (stall_x == `FALSE)
+            valid_x <= `FALSE;
+
+        if (kill_m == `TRUE)
+            valid_m <= `FALSE;
+        else if (stall_x == `FALSE)
+            valid_m <= valid_x & !kill_x;
+        else if (stall_m == `FALSE)
+            valid_m <= `FALSE;
+
+        if (stall_m == `FALSE)
+            valid_w <= valid_m & !kill_m;
+        else 
+            valid_w <= `FALSE;        
+    end
+end
+
+// Microcode pipeline registers
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+`ifdef CFG_USER_ENABLED
+        user_opcode <= {`LM32_USER_OPCODE_WIDTH{1'b0}};       
+`endif        
+        operand_0_x <= {`LM32_WORD_WIDTH{1'b0}};
+        operand_1_x <= {`LM32_WORD_WIDTH{1'b0}};
+        store_operand_x <= {`LM32_WORD_WIDTH{1'b0}};
+        branch_target_x <= {`LM32_PC_WIDTH{1'b0}};        
+        x_result_sel_csr_x <= `FALSE;
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+        x_result_sel_mc_arith_x <= `FALSE;
+`endif
+`ifdef LM32_NO_BARREL_SHIFT    
+        x_result_sel_shift_x <= `FALSE;
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+        x_result_sel_sext_x <= `FALSE;
+`endif    
+        x_result_sel_logic_x <= `FALSE;
+`ifdef CFG_USER_ENABLED
+        x_result_sel_user_x <= `FALSE;
+`endif
+        x_result_sel_add_x <= `FALSE;
+        m_result_sel_compare_x <= `FALSE;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+        m_result_sel_shift_x <= `FALSE;
+`endif    
+        w_result_sel_load_x <= `FALSE;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+        w_result_sel_mul_x <= `FALSE;
+`endif
+        x_bypass_enable_x <= `FALSE;
+        m_bypass_enable_x <= `FALSE;
+        write_enable_x <= `FALSE;
+        write_idx_x <= {`LM32_REG_IDX_WIDTH{1'b0}};
+        csr_x <= {`LM32_CSR_WIDTH{1'b0}};
+        load_x <= `FALSE;
+        store_x <= `FALSE;
+        size_x <= {`LM32_SIZE_WIDTH{1'b0}};
+        sign_extend_x <= `FALSE;
+        adder_op_x <= `FALSE;
+        adder_op_x_n <= `FALSE;
+        logic_op_x <= 4'h0;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+        direction_x <= `FALSE;
+`endif
+`ifdef CFG_ROTATE_ENABLED
+        rotate_x <= `FALSE;
+
+`endif
+        branch_x <= `FALSE;
+        branch_predict_x <= `FALSE;
+        branch_predict_taken_x <= `FALSE;
+        condition_x <= `LM32_CONDITION_U1;
+`ifdef CFG_DEBUG_ENABLED
+        break_x <= `FALSE;
+`endif
+        scall_x <= `FALSE;
+        eret_x <= `FALSE;
+`ifdef CFG_DEBUG_ENABLED
+        bret_x <= `FALSE;
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+        bus_error_x <= `FALSE;
+        data_bus_error_exception_m <= `FALSE;
+`endif
+        csr_write_enable_x <= `FALSE;
+        operand_m <= {`LM32_WORD_WIDTH{1'b0}};
+        branch_target_m <= {`LM32_PC_WIDTH{1'b0}};
+        m_result_sel_compare_m <= `FALSE;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+        m_result_sel_shift_m <= `FALSE;
+`endif    
+        w_result_sel_load_m <= `FALSE;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+        w_result_sel_mul_m <= `FALSE;
+`endif
+        m_bypass_enable_m <= `FALSE;
+        branch_m <= `FALSE;
+        branch_predict_m <= `FALSE;
+       branch_predict_taken_m <= `FALSE;
+        exception_m <= `FALSE;
+        load_m <= `FALSE;
+        store_m <= `FALSE;
+        write_enable_m <= `FALSE;            
+        write_idx_m <= {`LM32_REG_IDX_WIDTH{1'b0}};
+        condition_met_m <= `FALSE;
+`ifdef CFG_DCACHE_ENABLED
+        dflush_m <= `FALSE;
+`endif
+`ifdef CFG_DEBUG_ENABLED
+        debug_exception_m <= `FALSE;
+        non_debug_exception_m <= `FALSE;        
+`endif
+        operand_w <= {`LM32_WORD_WIDTH{1'b0}};        
+        w_result_sel_load_w <= `FALSE;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+        w_result_sel_mul_w <= `FALSE;
+`endif
+        write_idx_w <= {`LM32_REG_IDX_WIDTH{1'b0}};        
+        write_enable_w <= `FALSE;
+`ifdef CFG_DEBUG_ENABLED
+        debug_exception_w <= `FALSE;
+        non_debug_exception_w <= `FALSE;        
+`else
+        exception_w <= `FALSE;
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+        memop_pc_w <= {`LM32_PC_WIDTH{1'b0}};
+`endif
+    end
+    else
+    begin
+        // D/X stage registers
+       
+        if (stall_x == `FALSE)
+        begin
+`ifdef CFG_USER_ENABLED
+            user_opcode <= user_opcode_d;       
+`endif        
+            operand_0_x <= d_result_0;
+            operand_1_x <= d_result_1;
+            store_operand_x <= bypass_data_1;
+            branch_target_x <= branch_reg_d == `TRUE ? bypass_data_0[`LM32_PC_RNG] : branch_target_d;            
+            x_result_sel_csr_x <= x_result_sel_csr_d;
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+            x_result_sel_mc_arith_x <= x_result_sel_mc_arith_d;
+`endif
+`ifdef LM32_NO_BARREL_SHIFT    
+            x_result_sel_shift_x <= x_result_sel_shift_d;
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+            x_result_sel_sext_x <= x_result_sel_sext_d;
+`endif    
+            x_result_sel_logic_x <= x_result_sel_logic_d;
+`ifdef CFG_USER_ENABLED
+            x_result_sel_user_x <= x_result_sel_user_d;
+`endif
+            x_result_sel_add_x <= x_result_sel_add_d;
+            m_result_sel_compare_x <= m_result_sel_compare_d;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+            m_result_sel_shift_x <= m_result_sel_shift_d;
+`endif    
+            w_result_sel_load_x <= w_result_sel_load_d;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+            w_result_sel_mul_x <= w_result_sel_mul_d;
+`endif
+            x_bypass_enable_x <= x_bypass_enable_d;
+            m_bypass_enable_x <= m_bypass_enable_d;
+            load_x <= load_d;
+            store_x <= store_d;
+            branch_x <= branch_d;
+           branch_predict_x <= branch_predict_d;
+           branch_predict_taken_x <= branch_predict_taken_d;
+           write_idx_x <= write_idx_d;
+            csr_x <= csr_d;
+            size_x <= size_d;
+            sign_extend_x <= sign_extend_d;
+            adder_op_x <= adder_op_d;
+            adder_op_x_n <= ~adder_op_d;
+            logic_op_x <= logic_op_d;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+            direction_x <= direction_d;
+`endif
+`ifdef CFG_ROTATE_ENABLED
+            rotate_x <= rotate_d;
+`endif
+            condition_x <= condition_d;
+            csr_write_enable_x <= csr_write_enable_d;
+`ifdef CFG_DEBUG_ENABLED
+            break_x <= break_d;
+`endif
+            scall_x <= scall_d;
+`ifdef CFG_BUS_ERRORS_ENABLED
+            bus_error_x <= bus_error_d;
+`endif
+            eret_x <= eret_d;
+`ifdef CFG_DEBUG_ENABLED
+            bret_x <= bret_d; 
+`endif
+            write_enable_x <= write_enable_d;
+        end
+        
+        // X/M stage registers
+
+        if (stall_m == `FALSE)
+        begin
+            operand_m <= x_result;
+            m_result_sel_compare_m <= m_result_sel_compare_x;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+            m_result_sel_shift_m <= m_result_sel_shift_x;
+`endif    
+            if (exception_x == `TRUE)
+            begin
+                w_result_sel_load_m <= `FALSE;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+                w_result_sel_mul_m <= `FALSE;
+`endif
+            end
+            else
+            begin
+                w_result_sel_load_m <= w_result_sel_load_x;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+                w_result_sel_mul_m <= w_result_sel_mul_x;
+`endif
+            end
+            m_bypass_enable_m <= m_bypass_enable_x;
+            load_m <= load_x;
+            store_m <= store_x;
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+            branch_m <= branch_x && !branch_taken_x;
+`else
+            branch_m <= branch_x;
+           branch_predict_m <= branch_predict_x;
+           branch_predict_taken_m <= branch_predict_taken_x;
+`endif
+`ifdef CFG_DEBUG_ENABLED
+          // Data bus errors are generated by the wishbone and are
+          // made known to the processor only in next cycle (as a
+          // non-debug exception). A break instruction can be seen
+          // in same cycle (causing a debug exception). Handle non
+          // -debug exception first!
+            if (non_debug_exception_x == `TRUE) 
+                write_idx_m <= `LM32_EA_REG;
+            else if (debug_exception_x == `TRUE)
+                write_idx_m <= `LM32_BA_REG;
+            else 
+                write_idx_m <= write_idx_x;
+`else
+            if (exception_x == `TRUE)
+                write_idx_m <= `LM32_EA_REG;
+            else 
+                write_idx_m <= write_idx_x;
+`endif
+            condition_met_m <= condition_met_x;
+`ifdef CFG_DEBUG_ENABLED
+          if (exception_x == `TRUE)
+            if ((dc_re == `TRUE)
+`ifdef CFG_ALTERNATE_EBA
+         || (at_debug == `TRUE)
+`endif
+
+                || ((debug_exception_x == `TRUE) 
+                    && (non_debug_exception_x == `FALSE)))
+              branch_target_m <= {deba, eid_x, {3{1'b0}}};
+            else
+              branch_target_m <= {eba, eid_x, {3{1'b0}}};
+          else
+            branch_target_m <= branch_target_x;
+`else
+            branch_target_m <= exception_x == `TRUE ? {eba, eid_x, {3{1'b0}}} : branch_target_x;
+`endif
+`ifdef CFG_TRACE_ENABLED
+            eid_m <= eid_x;
+`endif
+`ifdef CFG_DCACHE_ENABLED
+            dflush_m <= dflush_x;
+`endif
+            eret_m <= eret_q_x;
+`ifdef CFG_DEBUG_ENABLED
+            bret_m <= bret_q_x; 
+`endif
+            write_enable_m <= exception_x == `TRUE ? `TRUE : write_enable_x;            
+`ifdef CFG_DEBUG_ENABLED
+            debug_exception_m <= debug_exception_x;
+            non_debug_exception_m <= non_debug_exception_x;        
+`endif
+        end
+        
+        // State changing regs
+        if (stall_m == `FALSE)
+        begin
+            if ((exception_x == `TRUE) && (q_x == `TRUE) && (stall_x == `FALSE))
+                exception_m <= `TRUE;
+            else 
+                exception_m <= `FALSE;
+`ifdef CFG_BUS_ERRORS_ENABLED
+          data_bus_error_exception_m <=    (data_bus_error_exception == `TRUE) 
+`ifdef CFG_DEBUG_ENABLED
+                                        && (reset_exception == `FALSE)
+`endif
+                                        ;
+`endif
+       end
+                
+        // M/W stage registers
+`ifdef CFG_BUS_ERRORS_ENABLED
+        operand_w <= exception_m == `TRUE ? (data_bus_error_exception_m ? {memop_pc_w, 2'b00} : {pc_m, 2'b00}) : m_result;
+`else
+        operand_w <= exception_m == `TRUE ? {pc_m, 2'b00} : m_result;
+`endif
+        w_result_sel_load_w <= w_result_sel_load_m;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+        w_result_sel_mul_w <= w_result_sel_mul_m;
+`endif
+        write_idx_w <= write_idx_m;
+`ifdef CFG_TRACE_ENABLED
+        eid_w <= eid_m;
+        eret_w <= eret_m;
+`ifdef CFG_DEBUG_ENABLED
+        bret_w <= bret_m; 
+`endif
+`endif
+        write_enable_w <= write_enable_m;
+`ifdef CFG_DEBUG_ENABLED
+        debug_exception_w <= debug_exception_m;
+        non_debug_exception_w <= non_debug_exception_m;
+`else
+        exception_w <= exception_m;
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+        if (   (stall_m == `FALSE)
+            && (data_bus_error_exception == `FALSE)
+            && (   (load_q_m == `TRUE) 
+                || (store_q_m == `TRUE)
+               )
+          )
+          memop_pc_w <= pc_m;
+`endif
+    end
+end
+
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+// Buffer data read from register file, in case a stall occurs, and watch for
+// any writes to the modified registers
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        use_buf <= `FALSE;
+        reg_data_buf_0 <= {`LM32_WORD_WIDTH{1'b0}};
+        reg_data_buf_1 <= {`LM32_WORD_WIDTH{1'b0}};
+    end
+    else
+    begin
+        if (stall_d == `FALSE)
+            use_buf <= `FALSE;
+        else if (use_buf == `FALSE)
+        begin        
+            reg_data_buf_0 <= reg_data_live_0;
+            reg_data_buf_1 <= reg_data_live_1;
+            use_buf <= `TRUE;
+        end        
+        if (reg_write_enable_q_w == `TRUE)
+        begin
+            if (write_idx_w == read_idx_0_d)
+                reg_data_buf_0 <= w_result;
+            if (write_idx_w == read_idx_1_d)
+                reg_data_buf_1 <= w_result;
+        end
+    end
+end
+`endif
+
+`ifdef LM32_EBR_REGISTER_FILE
+`else
+// Register file write port
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE) begin
+        registers[0] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[1] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[2] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[3] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[4] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[5] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[6] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[7] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[8] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[9] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[10] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[11] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[12] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[13] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[14] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[15] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[16] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[17] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[18] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[19] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[20] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[21] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[22] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[23] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[24] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[25] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[26] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[27] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[28] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[29] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[30] <= {`LM32_WORD_WIDTH{1'b0}};
+        registers[31] <= {`LM32_WORD_WIDTH{1'b0}};
+        end
+    else begin
+        if (reg_write_enable_q_w == `TRUE)
+          registers[write_idx_w] <= w_result;
+        end
+end
+`endif
+
+`ifdef CFG_TRACE_ENABLED
+// PC tracing logic
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        trace_pc_valid <= `FALSE;
+        trace_pc <= {`LM32_PC_WIDTH{1'b0}};
+        trace_exception <= `FALSE;
+        trace_eid <= `LM32_EID_RESET;
+        trace_eret <= `FALSE;
+`ifdef CFG_DEBUG_ENABLED
+        trace_bret <= `FALSE;
+`endif
+        pc_c <= `CFG_EBA_RESET/4;
+    end
+    else
+    begin
+        trace_pc_valid <= `FALSE;
+        // Has an exception occured
+`ifdef CFG_DEBUG_ENABLED
+        if ((debug_exception_q_w == `TRUE) || (non_debug_exception_q_w == `TRUE))
+`else
+        if (exception_q_w == `TRUE)
+`endif
+        begin        
+            trace_exception <= `TRUE;
+            trace_pc_valid <= `TRUE;
+            trace_pc <= pc_w;
+            trace_eid <= eid_w;
+        end
+        else
+            trace_exception <= `FALSE;
+        
+        if ((valid_w == `TRUE) && (!kill_w))
+        begin
+            // An instruction is commiting. Determine if it is non-sequential
+            if (pc_c + 1'b1 != pc_w)
+            begin
+                // Non-sequential instruction
+                trace_pc_valid <= `TRUE;
+                trace_pc <= pc_w;
+            end
+            // Record PC so we can determine if next instruction is sequential or not
+            pc_c <= pc_w;
+            // Indicate if it was an eret/bret instruction
+            trace_eret <= eret_w;
+`ifdef CFG_DEBUG_ENABLED
+            trace_bret <= bret_w;
+`endif
+        end
+        else
+        begin
+            trace_eret <= `FALSE;
+`ifdef CFG_DEBUG_ENABLED
+            trace_bret <= `FALSE;
+`endif
+        end
+    end
+end
+`endif
+      
+endmodule 
diff --git a/verilog/lm32/lm32_dcache.v b/verilog/lm32/lm32_dcache.v
new file mode 100644 (file)
index 0000000..71e4c0b
--- /dev/null
@@ -0,0 +1,527 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_dcache.v
+// Title            : Data cache
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version         : 3.1
+//                  : Support for user-selected resource usage when implementing
+//                  : cache memory. Additional parameters must be defined when
+//                  : invoking lm32_ram.v
+// =============================================================================
+                                                                
+`include "lm32_include.v"
+
+`ifdef CFG_DCACHE_ENABLED
+
+`define LM32_DC_ADDR_OFFSET_RNG          addr_offset_msb:addr_offset_lsb
+`define LM32_DC_ADDR_SET_RNG             addr_set_msb:addr_set_lsb
+`define LM32_DC_ADDR_TAG_RNG             addr_tag_msb:addr_tag_lsb
+`define LM32_DC_ADDR_IDX_RNG             addr_set_msb:addr_offset_lsb
+
+`define LM32_DC_TMEM_ADDR_WIDTH          addr_set_width
+`define LM32_DC_TMEM_ADDR_RNG            (`LM32_DC_TMEM_ADDR_WIDTH-1):0
+`define LM32_DC_DMEM_ADDR_WIDTH          (addr_offset_width+addr_set_width)
+`define LM32_DC_DMEM_ADDR_RNG            (`LM32_DC_DMEM_ADDR_WIDTH-1):0
+
+`define LM32_DC_TAGS_WIDTH               (addr_tag_width+1)
+`define LM32_DC_TAGS_RNG                 (`LM32_DC_TAGS_WIDTH-1):0
+`define LM32_DC_TAGS_TAG_RNG             (`LM32_DC_TAGS_WIDTH-1):1
+`define LM32_DC_TAGS_VALID_RNG           0
+
+`define LM32_DC_STATE_RNG                2:0
+`define LM32_DC_STATE_FLUSH              3'b001
+`define LM32_DC_STATE_CHECK              3'b010
+`define LM32_DC_STATE_REFILL             3'b100
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_dcache ( 
+    // ----- Inputs -----
+    clk_i,
+    rst_i,    
+    stall_a,
+    stall_x,
+    stall_m,
+    address_x,
+    address_m,
+    load_q_m,
+    store_q_m,
+    store_data,
+    store_byte_select,
+    refill_ready,
+    refill_data,
+    dflush,
+    // ----- Outputs -----
+    stall_request,
+    restart_request,
+    refill_request,
+    refill_address,
+    refilling,
+    load_data
+    );
+
+/////////////////////////////////////////////////////
+// Parameters
+/////////////////////////////////////////////////////
+
+parameter associativity = 1;                            // Associativity of the cache (Number of ways)
+parameter sets = 512;                                   // Number of sets
+parameter bytes_per_line = 16;                          // Number of bytes per cache line
+parameter base_address = 0;                             // Base address of cachable memory
+parameter limit = 0;                                    // Limit (highest address) of cachable memory
+
+localparam addr_offset_width = clogb2(bytes_per_line)-1-2;
+localparam addr_set_width = clogb2(sets)-1;
+localparam addr_offset_lsb = 2;
+localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
+localparam addr_set_lsb = (addr_offset_msb+1);
+localparam addr_set_msb = (addr_set_lsb+addr_set_width-1);
+localparam addr_tag_lsb = (addr_set_msb+1);
+localparam addr_tag_msb = clogb2(`CFG_DCACHE_LIMIT-`CFG_DCACHE_BASE_ADDRESS)-1;
+localparam addr_tag_width = (addr_tag_msb-addr_tag_lsb+1);
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                            // Clock
+input rst_i;                                            // Reset
+
+input stall_a;                                          // Stall A stage
+input stall_x;                                          // Stall X stage
+input stall_m;                                          // Stall M stage
+
+input [`LM32_WORD_RNG] address_x;                       // X stage load/store address
+input [`LM32_WORD_RNG] address_m;                       // M stage load/store address
+input load_q_m;                                         // Load instruction in M stage
+input store_q_m;                                        // Store instruction in M stage
+input [`LM32_WORD_RNG] store_data;                      // Data to store
+input [`LM32_BYTE_SELECT_RNG] store_byte_select;        // Which bytes in store data should be modified
+
+input refill_ready;                                     // Indicates next word of refill data is ready
+input [`LM32_WORD_RNG] refill_data;                     // Refill data
+
+input dflush;                                           // Indicates cache should be flushed
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output stall_request;                                   // Request pipeline be stalled because cache is busy
+wire   stall_request;
+output restart_request;                                 // Request to restart instruction that caused the cache miss
+reg    restart_request;
+output refill_request;                                  // Request a refill 
+reg    refill_request;
+output [`LM32_WORD_RNG] refill_address;                 // Address to refill from
+reg    [`LM32_WORD_RNG] refill_address;
+output refilling;                                       // Indicates if the cache is currently refilling
+reg    refilling;
+output [`LM32_WORD_RNG] load_data;                      // Data read from cache
+wire   [`LM32_WORD_RNG] load_data;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+wire read_port_enable;                                  // Cache memory read port clock enable
+wire write_port_enable;                                 // Cache memory write port clock enable
+wire [0:associativity-1] way_tmem_we;                   // Tag memory write enable
+wire [0:associativity-1] way_dmem_we;                   // Data memory write enable
+wire [`LM32_WORD_RNG] way_data[0:associativity-1];      // Data read from data memory
+wire [`LM32_DC_TAGS_TAG_RNG] way_tag[0:associativity-1];// Tag read from tag memory
+wire [0:associativity-1] way_valid;                     // Indicates which ways are valid
+wire [0:associativity-1] way_match;                     // Indicates which ways matched
+wire miss;                                              // Indicates no ways matched
+
+wire [`LM32_DC_TMEM_ADDR_RNG] tmem_read_address;        // Tag memory read address
+wire [`LM32_DC_TMEM_ADDR_RNG] tmem_write_address;       // Tag memory write address
+wire [`LM32_DC_DMEM_ADDR_RNG] dmem_read_address;        // Data memory read address
+wire [`LM32_DC_DMEM_ADDR_RNG] dmem_write_address;       // Data memory write address
+wire [`LM32_DC_TAGS_RNG] tmem_write_data;               // Tag memory write data        
+reg [`LM32_WORD_RNG] dmem_write_data;                   // Data memory write data
+
+reg [`LM32_DC_STATE_RNG] state;                         // Current state of FSM
+wire flushing;                                          // Indicates if cache is currently flushing
+wire check;                                             // Indicates if cache is currently checking for hits/misses
+wire refill;                                            // Indicates if cache is currently refilling
+
+wire valid_store;                                       // Indicates if there is a valid store instruction
+reg [associativity-1:0] refill_way_select;              // Which way should be refilled
+reg [`LM32_DC_ADDR_OFFSET_RNG] refill_offset;           // Which word in cache line should be refilled
+wire last_refill;                                       // Indicates when on last cycle of cache refill
+reg [`LM32_DC_TMEM_ADDR_RNG] flush_set;                 // Which set is currently being flushed
+
+genvar i, j;
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+
+/////////////////////////////////////////////////////
+// Instantiations
+/////////////////////////////////////////////////////
+
+   generate
+      for (i = 0; i < associativity; i = i + 1)    
+       begin : memories
+          // Way data
+           if (`LM32_DC_DMEM_ADDR_WIDTH < 11)
+             begin : data_memories
+               lm32_ram 
+                 #(
+                   // ----- Parameters -------
+                   .data_width (32),
+                   .address_width (`LM32_DC_DMEM_ADDR_WIDTH)
+// Modified for Milkymist: removed non-portable RAM parameters
+                   ) way_0_data_ram 
+                   (
+                    // ----- Inputs -------
+                    .read_clk (clk_i),
+                    .write_clk (clk_i),
+                    .reset (rst_i),
+                    .read_address (dmem_read_address),
+                    .enable_read (read_port_enable),
+                    .write_address (dmem_write_address),
+                    .enable_write (write_port_enable),
+                    .write_enable (way_dmem_we[i]),
+                    .write_data (dmem_write_data),    
+                    // ----- Outputs -------
+                    .read_data (way_data[i])
+                    );    
+             end
+           else
+             begin
+               for (j = 0; j < 4; j = j + 1)    
+                 begin : byte_memories
+                    lm32_ram 
+                      #(
+                        // ----- Parameters -------
+                        .data_width (8),
+                        .address_width (`LM32_DC_DMEM_ADDR_WIDTH)
+// Modified for Milkymist: removed non-portable RAM parameters
+                        ) way_0_data_ram 
+                        (
+                         // ----- Inputs -------
+                         .read_clk (clk_i),
+                         .write_clk (clk_i),
+                         .reset (rst_i),
+                         .read_address (dmem_read_address),
+                         .enable_read (read_port_enable),
+                         .write_address (dmem_write_address),
+                         .enable_write (write_port_enable),
+                         .write_enable (way_dmem_we[i] & (store_byte_select[j] | refill)),
+                         .write_data (dmem_write_data[(j+1)*8-1:j*8]),    
+                         // ----- Outputs -------
+                         .read_data (way_data[i][(j+1)*8-1:j*8])
+                         );
+                 end
+             end
+          
+          // Way tags
+          lm32_ram 
+            #(
+              // ----- Parameters -------
+              .data_width (`LM32_DC_TAGS_WIDTH),
+              .address_width (`LM32_DC_TMEM_ADDR_WIDTH)
+// Modified for Milkymist: removed non-portable RAM parameters
+              ) way_0_tag_ram 
+              (
+               // ----- Inputs -------
+               .read_clk (clk_i),
+               .write_clk (clk_i),
+               .reset (rst_i),
+               .read_address (tmem_read_address),
+               .enable_read (read_port_enable),
+               .write_address (tmem_write_address),
+               .enable_write (`TRUE),
+               .write_enable (way_tmem_we[i]),
+               .write_data (tmem_write_data),
+               // ----- Outputs -------
+               .read_data ({way_tag[i], way_valid[i]})
+               );
+       end
+      
+   endgenerate
+
+/////////////////////////////////////////////////////
+// Combinational logic
+/////////////////////////////////////////////////////
+
+// Compute which ways in the cache match the address being read
+generate
+    for (i = 0; i < associativity; i = i + 1)
+    begin : match
+assign way_match[i] = ({way_tag[i], way_valid[i]} == {address_m[`LM32_DC_ADDR_TAG_RNG], `TRUE});
+    end
+endgenerate
+
+// Select data from way that matched the address being read     
+generate
+    if (associativity == 1)    
+        begin : data_1
+assign load_data = way_data[0];
+    end
+    else if (associativity == 2)
+        begin : data_2
+assign load_data = way_match[0] ? way_data[0] : way_data[1]; 
+    end
+endgenerate
+
+generate
+    if (`LM32_DC_DMEM_ADDR_WIDTH < 11)
+    begin
+// Select data to write to data memories
+always @(*)
+begin
+    if (refill == `TRUE)
+        dmem_write_data = refill_data;
+    else
+    begin
+        dmem_write_data[`LM32_BYTE_0_RNG] = store_byte_select[0] ? store_data[`LM32_BYTE_0_RNG] : load_data[`LM32_BYTE_0_RNG];
+        dmem_write_data[`LM32_BYTE_1_RNG] = store_byte_select[1] ? store_data[`LM32_BYTE_1_RNG] : load_data[`LM32_BYTE_1_RNG];
+        dmem_write_data[`LM32_BYTE_2_RNG] = store_byte_select[2] ? store_data[`LM32_BYTE_2_RNG] : load_data[`LM32_BYTE_2_RNG];
+        dmem_write_data[`LM32_BYTE_3_RNG] = store_byte_select[3] ? store_data[`LM32_BYTE_3_RNG] : load_data[`LM32_BYTE_3_RNG];
+    end
+end
+    end
+    else
+    begin
+// Select data to write to data memories - FIXME: Should use different write ports on dual port RAMs, but they don't work
+always @(*)
+begin
+    if (refill == `TRUE)
+        dmem_write_data = refill_data;
+    else
+        dmem_write_data = store_data;
+end
+    end
+endgenerate
+
+// Compute address to use to index into the data memories
+generate 
+     if (bytes_per_line > 4)
+assign dmem_write_address = (refill == `TRUE) 
+                            ? {refill_address[`LM32_DC_ADDR_SET_RNG], refill_offset}
+                            : address_m[`LM32_DC_ADDR_IDX_RNG];
+    else
+assign dmem_write_address = (refill == `TRUE) 
+                            ? refill_address[`LM32_DC_ADDR_SET_RNG]
+                            : address_m[`LM32_DC_ADDR_IDX_RNG];
+endgenerate
+assign dmem_read_address = address_x[`LM32_DC_ADDR_IDX_RNG];
+// Compute address to use to index into the tag memories   
+assign tmem_write_address = (flushing == `TRUE)
+                            ? flush_set
+                            : refill_address[`LM32_DC_ADDR_SET_RNG];
+assign tmem_read_address = address_x[`LM32_DC_ADDR_SET_RNG];
+
+// Compute signal to indicate when we are on the last refill accesses
+generate 
+    if (bytes_per_line > 4)                            
+assign last_refill = refill_offset == {addr_offset_width{1'b1}};
+    else
+assign last_refill = `TRUE;
+endgenerate
+
+// Compute data and tag memory access enable
+assign read_port_enable = (stall_x == `FALSE);
+assign write_port_enable = (refill_ready == `TRUE) || !stall_m;
+
+// Determine when we have a valid store
+assign valid_store = (store_q_m == `TRUE) && (check == `TRUE);
+
+// Compute data and tag memory write enables
+generate
+    if (associativity == 1) 
+    begin : we_1     
+assign way_dmem_we[0] = (refill_ready == `TRUE) || ((valid_store == `TRUE) && (way_match[0] == `TRUE));
+assign way_tmem_we[0] = (refill_ready == `TRUE) || (flushing == `TRUE);
+    end 
+    else 
+    begin : we_2
+assign way_dmem_we[0] = ((refill_ready == `TRUE) && (refill_way_select[0] == `TRUE)) || ((valid_store == `TRUE) && (way_match[0] == `TRUE));
+assign way_dmem_we[1] = ((refill_ready == `TRUE) && (refill_way_select[1] == `TRUE)) || ((valid_store == `TRUE) && (way_match[1] == `TRUE));
+assign way_tmem_we[0] = ((refill_ready == `TRUE) && (refill_way_select[0] == `TRUE)) || (flushing == `TRUE);
+assign way_tmem_we[1] = ((refill_ready == `TRUE) && (refill_way_select[1] == `TRUE)) || (flushing == `TRUE);
+    end
+endgenerate
+
+// On the last refill cycle set the valid bit, for all other writes it should be cleared
+assign tmem_write_data[`LM32_DC_TAGS_VALID_RNG] = ((last_refill == `TRUE) || (valid_store == `TRUE)) && (flushing == `FALSE);
+assign tmem_write_data[`LM32_DC_TAGS_TAG_RNG] = refill_address[`LM32_DC_ADDR_TAG_RNG];
+
+// Signals that indicate which state we are in
+assign flushing = state[0];
+assign check = state[1];
+assign refill = state[2];
+
+assign miss = (~(|way_match)) && (load_q_m == `TRUE) && (stall_m == `FALSE);
+assign stall_request = (check == `FALSE);
+                      
+/////////////////////////////////////////////////////
+// Sequential logic
+/////////////////////////////////////////////////////
+
+// Record way selected for replacement on a cache miss
+generate
+    if (associativity >= 2) 
+    begin : way_select      
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        refill_way_select <= {{associativity-1{1'b0}}, 1'b1};
+    else
+    begin        
+        if (refill_request == `TRUE)
+            refill_way_select <= {refill_way_select[0], refill_way_select[1]};
+    end
+end
+    end 
+endgenerate   
+
+// Record whether we are currently refilling
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        refilling <= `FALSE;
+    else 
+        refilling <= refill;
+end
+
+// Instruction cache control FSM
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        state <= `LM32_DC_STATE_FLUSH;
+        flush_set <= {`LM32_DC_TMEM_ADDR_WIDTH{1'b1}};
+        refill_request <= `FALSE;
+        refill_address <= {`LM32_WORD_WIDTH{1'bx}};
+        restart_request <= `FALSE;
+    end
+    else 
+    begin
+        case (state)
+
+        // Flush the cache 
+        `LM32_DC_STATE_FLUSH:
+        begin
+            if (flush_set == {`LM32_DC_TMEM_ADDR_WIDTH{1'b0}})
+                state <= `LM32_DC_STATE_CHECK;
+            flush_set <= flush_set - 1'b1;
+        end
+        
+        // Check for cache misses
+        `LM32_DC_STATE_CHECK:
+        begin
+            if (stall_a == `FALSE)
+                restart_request <= `FALSE;
+            if (miss == `TRUE)
+            begin
+                refill_request <= `TRUE;
+                refill_address <= address_m;
+                state <= `LM32_DC_STATE_REFILL;
+            end
+            else if (dflush == `TRUE)
+                state <= `LM32_DC_STATE_FLUSH;
+        end
+
+        // Refill a cache line
+        `LM32_DC_STATE_REFILL:
+        begin
+            refill_request <= `FALSE;
+            if (refill_ready == `TRUE)
+            begin
+                if (last_refill == `TRUE)
+                begin
+                    restart_request <= `TRUE;
+                    state <= `LM32_DC_STATE_CHECK;
+                end
+            end
+        end
+        
+        endcase        
+    end
+end
+
+generate
+    if (bytes_per_line > 4)
+    begin
+// Refill offset
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        refill_offset <= {addr_offset_width{1'b0}};
+    else 
+    begin
+        case (state)
+        
+        // Check for cache misses
+        `LM32_DC_STATE_CHECK:
+        begin
+            if (miss == `TRUE)
+                refill_offset <= {addr_offset_width{1'b0}};
+        end
+
+        // Refill a cache line
+        `LM32_DC_STATE_REFILL:
+        begin
+            if (refill_ready == `TRUE)
+                refill_offset <= refill_offset + 1'b1;
+        end
+        
+        endcase        
+    end
+end
+    end
+endgenerate
+
+endmodule
+
+`endif
+
diff --git a/verilog/lm32/lm32_debug.v b/verilog/lm32/lm32_debug.v
new file mode 100644 (file)
index 0000000..90c8d20
--- /dev/null
@@ -0,0 +1,369 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_debug.v
+// Title            : Hardware debug registers and associated logic.
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// Version          : 3.2
+//                  : Fixed simulation bug which flares up when number of 
+//                  : watchpoints is zero.
+// =============================================================================
+
+`include "lm32_include.v"
+
+`ifdef CFG_DEBUG_ENABLED
+
+// States for single-step FSM
+`define LM32_DEBUG_SS_STATE_RNG                 2:0
+`define LM32_DEBUG_SS_STATE_IDLE                3'b000
+`define LM32_DEBUG_SS_STATE_WAIT_FOR_RET        3'b001
+`define LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN    3'b010
+`define LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT    3'b011
+`define LM32_DEBUG_SS_STATE_RESTART             3'b100
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_debug (
+    // ----- Inputs -------
+    clk_i, 
+    rst_i,
+    pc_x,
+    load_x,
+    store_x,
+    load_store_address_x,
+    csr_write_enable_x,
+    csr_write_data,
+    csr_x,
+`ifdef CFG_HW_DEBUG_ENABLED
+    jtag_csr_write_enable,
+    jtag_csr_write_data,
+    jtag_csr,
+`endif
+`ifdef LM32_SINGLE_STEP_ENABLED
+    eret_q_x,
+    bret_q_x,
+    stall_x,
+    exception_x,
+    q_x,
+`ifdef CFG_DCACHE_ENABLED
+    dcache_refill_request,
+`endif
+`endif
+    // ----- Outputs -------
+`ifdef LM32_SINGLE_STEP_ENABLED
+    dc_ss,
+`endif
+    dc_re,
+    bp_match,
+    wp_match
+    );
+    
+/////////////////////////////////////////////////////
+// Parameters
+/////////////////////////////////////////////////////
+
+parameter breakpoints = 0;                      // Number of breakpoint CSRs
+parameter watchpoints = 0;                      // Number of watchpoint CSRs
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                    // Clock
+input rst_i;                                    // Reset
+
+input [`LM32_PC_RNG] pc_x;                      // X stage PC
+input load_x;                                   // Load instruction in X stage
+input store_x;                                  // Store instruction in X stage
+input [`LM32_WORD_RNG] load_store_address_x;    // Load or store effective address
+input csr_write_enable_x;                       // wcsr instruction in X stage
+input [`LM32_WORD_RNG] csr_write_data;          // Data to write to CSR
+input [`LM32_CSR_RNG] csr_x;                    // Which CSR to write
+`ifdef CFG_HW_DEBUG_ENABLED
+input jtag_csr_write_enable;                    // JTAG interface CSR write enable
+input [`LM32_WORD_RNG] jtag_csr_write_data;     // Data to write to CSR
+input [`LM32_CSR_RNG] jtag_csr;                 // Which CSR to write
+`endif
+`ifdef LM32_SINGLE_STEP_ENABLED
+input eret_q_x;                                 // eret instruction in X stage
+input bret_q_x;                                 // bret instruction in X stage
+input stall_x;                                  // Instruction in X stage is stalled
+input exception_x;                              // An exception has occured in X stage 
+input q_x;                                      // Indicates the instruction in the X stage is qualified
+`ifdef CFG_DCACHE_ENABLED
+input dcache_refill_request;                    // Indicates data cache wants to be refilled 
+`endif
+`endif
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+`ifdef LM32_SINGLE_STEP_ENABLED
+output dc_ss;                                   // Single-step enable
+reg    dc_ss;
+`endif
+output dc_re;                                   // Remap exceptions
+reg    dc_re;
+output bp_match;                                // Indicates a breakpoint has matched
+wire   bp_match;        
+output wp_match;                                // Indicates a watchpoint has matched
+wire   wp_match;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+genvar i;                                       // Loop index for generate statements
+
+// Debug CSRs
+
+reg [`LM32_PC_RNG] bp_a[0:breakpoints-1];       // Instruction breakpoint address
+reg bp_e[0:breakpoints-1];                      // Instruction breakpoint enable
+wire [0:breakpoints-1]bp_match_n;               // Indicates if a h/w instruction breakpoint matched
+
+reg [`LM32_WPC_C_RNG] wpc_c[0:watchpoints-1];   // Watchpoint enable
+reg [`LM32_WORD_RNG] wp[0:watchpoints-1];       // Watchpoint address
+wire [0:watchpoints]wp_match_n;               // Indicates if a h/w data watchpoint matched
+
+wire debug_csr_write_enable;                    // Debug CSR write enable (from either a wcsr instruction of external debugger)
+wire [`LM32_WORD_RNG] debug_csr_write_data;     // Data to write to debug CSR
+wire [`LM32_CSR_RNG] debug_csr;                 // Debug CSR to write to
+
+`ifdef LM32_SINGLE_STEP_ENABLED
+// FIXME: Declaring this as a reg causes ModelSim 6.1.15b to crash, so use integer for now
+//reg [`LM32_DEBUG_SS_STATE_RNG] state;           // State of single-step FSM
+integer state;                                  // State of single-step FSM
+`endif
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+// Check for breakpoints
+generate
+    for (i = 0; i < breakpoints; i = i + 1)
+    begin : bp_comb
+assign bp_match_n[i] = ((bp_a[i] == pc_x) && (bp_e[i] == `TRUE));
+    end
+endgenerate
+generate 
+`ifdef LM32_SINGLE_STEP_ENABLED
+    if (breakpoints > 0) 
+assign bp_match = (|bp_match_n) || (state == `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT);
+    else
+assign bp_match = state == `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT;
+`else
+    if (breakpoints > 0) 
+assign bp_match = |bp_match_n;
+    else
+assign bp_match = `FALSE;
+`endif
+endgenerate    
+               
+// Check for watchpoints
+generate 
+    for (i = 0; i < watchpoints; i = i + 1)
+    begin : wp_comb
+assign wp_match_n[i] = (wp[i] == load_store_address_x) && ((load_x & wpc_c[i][0]) | (store_x & wpc_c[i][1]));
+    end               
+endgenerate
+generate
+    if (watchpoints > 0) 
+assign wp_match = |wp_match_n;                
+    else
+assign wp_match = `FALSE;
+endgenerate
+                
+`ifdef CFG_HW_DEBUG_ENABLED                
+// Multiplex between wcsr instruction writes and debugger writes to the debug CSRs
+assign debug_csr_write_enable = (csr_write_enable_x == `TRUE) || (jtag_csr_write_enable == `TRUE);
+assign debug_csr_write_data = jtag_csr_write_enable == `TRUE ? jtag_csr_write_data : csr_write_data;
+assign debug_csr = jtag_csr_write_enable == `TRUE ? jtag_csr : csr_x;
+`else
+assign debug_csr_write_enable = csr_write_enable_x;
+assign debug_csr_write_data = csr_write_data;
+assign debug_csr = csr_x;
+`endif
+
+/////////////////////////////////////////////////////
+// Sequential Logic
+/////////////////////////////////////////////////////
+
+// Breakpoint address and enable CSRs
+generate
+    for (i = 0; i < breakpoints; i = i + 1)
+    begin : bp_seq
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        bp_a[i] <= {`LM32_PC_WIDTH{1'bx}};
+        bp_e[i] <= `FALSE;
+    end
+    else
+    begin
+        if ((debug_csr_write_enable == `TRUE) && (debug_csr == `LM32_CSR_BP0 + i))
+        begin
+            bp_a[i] <= debug_csr_write_data[`LM32_PC_RNG];
+            bp_e[i] <= debug_csr_write_data[0];
+        end
+    end
+end    
+    end
+endgenerate
+
+// Watchpoint address and control flags CSRs
+generate
+    for (i = 0; i < watchpoints; i = i + 1)
+    begin : wp_seq
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        wp[i] <= {`LM32_WORD_WIDTH{1'bx}};
+        wpc_c[i] <= `LM32_WPC_C_DISABLED;
+    end
+    else
+    begin
+        if (debug_csr_write_enable == `TRUE)
+        begin
+            if (debug_csr == `LM32_CSR_DC)
+                wpc_c[i] <= debug_csr_write_data[3+i*2:2+i*2];
+            if (debug_csr == `LM32_CSR_WP0 + i)
+                wp[i] <= debug_csr_write_data;
+        end
+    end  
+end
+    end
+endgenerate
+
+// Remap exceptions control bit
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        dc_re <= `FALSE;
+    else
+    begin
+        if ((debug_csr_write_enable == `TRUE) && (debug_csr == `LM32_CSR_DC))
+            dc_re <= debug_csr_write_data[1];
+    end
+end    
+
+`ifdef LM32_SINGLE_STEP_ENABLED
+// Single-step control flag
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        state <= `LM32_DEBUG_SS_STATE_IDLE;
+        dc_ss <= `FALSE;
+    end
+    else
+    begin
+        if ((debug_csr_write_enable == `TRUE) && (debug_csr == `LM32_CSR_DC))
+        begin
+            dc_ss <= debug_csr_write_data[0];
+            if (debug_csr_write_data[0] == `FALSE) 
+                state <= `LM32_DEBUG_SS_STATE_IDLE;
+            else 
+                state <= `LM32_DEBUG_SS_STATE_WAIT_FOR_RET;
+        end
+        case (state)
+        `LM32_DEBUG_SS_STATE_WAIT_FOR_RET:
+        begin
+            // Wait for eret or bret instruction to be executed
+            if (   (   (eret_q_x == `TRUE)
+                    || (bret_q_x == `TRUE)
+                    )
+                && (stall_x == `FALSE)
+               )
+                state <= `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN; 
+        end
+        `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN:
+        begin
+            // Wait for an instruction to be executed
+            if ((q_x == `TRUE) && (stall_x == `FALSE))
+                state <= `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT;
+        end
+        `LM32_DEBUG_SS_STATE_RAISE_BREAKPOINT:
+        begin
+            // Wait for exception to be raised
+`ifdef CFG_DCACHE_ENABLED
+            if (dcache_refill_request == `TRUE)
+                state <= `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN;
+            else 
+`endif
+                 if ((exception_x == `TRUE) && (q_x == `TRUE) && (stall_x == `FALSE))
+            begin
+                dc_ss <= `FALSE;
+                state <= `LM32_DEBUG_SS_STATE_RESTART;
+            end
+        end
+        `LM32_DEBUG_SS_STATE_RESTART:
+        begin
+            // Watch to see if stepped instruction is restarted due to a cache miss
+`ifdef CFG_DCACHE_ENABLED
+            if (dcache_refill_request == `TRUE)
+                state <= `LM32_DEBUG_SS_STATE_EXECUTE_ONE_INSN;
+            else 
+`endif
+                state <= `LM32_DEBUG_SS_STATE_IDLE;
+        end
+        endcase
+    end
+end
+`endif
+
+endmodule
+
+`endif
diff --git a/verilog/lm32/lm32_decoder.v b/verilog/lm32/lm32_decoder.v
new file mode 100644 (file)
index 0000000..eebe5c3
--- /dev/null
@@ -0,0 +1,604 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_decoder.v
+// Title            : Instruction decoder
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : Support for static branch prediction. Information about
+//                  : branch type is generated and passed on to the predictor.
+// Version          : 3.2
+//                  : No change
+// Version          : 3.3
+//                  : Renamed port names that conflict with keywords reserved
+//                  : in System-Verilog.
+// =============================================================================
+
+`include "lm32_include.v"
+
+// Index of opcode field in an instruction
+`define LM32_OPCODE_RNG         31:26
+`define LM32_OP_RNG             30:26
+
+// Opcodes - Some are only listed as 5 bits as their MSB is a don't care
+`define LM32_OPCODE_ADD         5'b01101
+`define LM32_OPCODE_AND         5'b01000
+`define LM32_OPCODE_ANDHI       6'b011000
+`define LM32_OPCODE_B           6'b110000
+`define LM32_OPCODE_BI          6'b111000
+`define LM32_OPCODE_BE          6'b010001
+`define LM32_OPCODE_BG          6'b010010
+`define LM32_OPCODE_BGE         6'b010011
+`define LM32_OPCODE_BGEU        6'b010100
+`define LM32_OPCODE_BGU         6'b010101
+`define LM32_OPCODE_BNE         6'b010111
+`define LM32_OPCODE_CALL        6'b110110
+`define LM32_OPCODE_CALLI       6'b111110
+`define LM32_OPCODE_CMPE        5'b11001
+`define LM32_OPCODE_CMPG        5'b11010
+`define LM32_OPCODE_CMPGE       5'b11011
+`define LM32_OPCODE_CMPGEU      5'b11100
+`define LM32_OPCODE_CMPGU       5'b11101
+`define LM32_OPCODE_CMPNE       5'b11111
+`define LM32_OPCODE_DIVU        6'b100011
+`define LM32_OPCODE_LB          6'b000100
+`define LM32_OPCODE_LBU         6'b010000
+`define LM32_OPCODE_LH          6'b000111
+`define LM32_OPCODE_LHU         6'b001011
+`define LM32_OPCODE_LW          6'b001010
+`define LM32_OPCODE_MODU        6'b110001
+`define LM32_OPCODE_MUL         5'b00010
+`define LM32_OPCODE_NOR         5'b00001
+`define LM32_OPCODE_OR          5'b01110
+`define LM32_OPCODE_ORHI        6'b011110
+`define LM32_OPCODE_RAISE       6'b101011
+`define LM32_OPCODE_RCSR        6'b100100
+`define LM32_OPCODE_SB          6'b001100
+`define LM32_OPCODE_SEXTB       6'b101100
+`define LM32_OPCODE_SEXTH       6'b110111
+`define LM32_OPCODE_SH          6'b000011
+`define LM32_OPCODE_SL          5'b01111
+`define LM32_OPCODE_SR          5'b00101
+`define LM32_OPCODE_SRU         5'b00000
+`define LM32_OPCODE_SUB         6'b110010
+`define LM32_OPCODE_SW          6'b010110
+`define LM32_OPCODE_USER        6'b110011
+`define LM32_OPCODE_WCSR        6'b110100
+`define LM32_OPCODE_XNOR        5'b01001
+`define LM32_OPCODE_XOR         5'b00110
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_decoder (
+    // ----- Inputs -------
+    instruction,
+    // ----- Outputs -------
+    d_result_sel_0,
+    d_result_sel_1,        
+    x_result_sel_csr,
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+    x_result_sel_mc_arith,
+`endif    
+`ifdef LM32_NO_BARREL_SHIFT    
+    x_result_sel_shift,
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+    x_result_sel_sext,
+`endif    
+    x_result_sel_logic,
+`ifdef CFG_USER_ENABLED
+    x_result_sel_user,
+`endif
+    x_result_sel_add,
+    m_result_sel_compare,
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+    m_result_sel_shift,  
+`endif    
+    w_result_sel_load,
+`ifdef CFG_PL_MULTIPLY_ENABLED
+    w_result_sel_mul,
+`endif
+    x_bypass_enable,
+    m_bypass_enable,
+    read_enable_0,
+    read_idx_0,
+    read_enable_1,
+    read_idx_1,
+    write_enable,
+    write_idx,
+    immediate,
+    branch_offset,
+    load,
+    store,
+    size,
+    sign_extend,
+    adder_op,
+    logic_op,
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+    direction,
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+    shift_left,
+    shift_right,
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+    multiply,
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+    divide,
+    modulus,
+`endif
+    branch,
+    branch_reg,
+    condition,
+    bi_conditional,
+    bi_unconditional,
+`ifdef CFG_DEBUG_ENABLED
+    break_opcode,
+`endif
+    scall,
+    eret,
+`ifdef CFG_DEBUG_ENABLED
+    bret,
+`endif
+`ifdef CFG_USER_ENABLED
+    user_opcode,
+`endif
+    csr_write_enable
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input [`LM32_INSTRUCTION_RNG] instruction;       // Instruction to decode
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output [`LM32_D_RESULT_SEL_0_RNG] d_result_sel_0;
+reg    [`LM32_D_RESULT_SEL_0_RNG] d_result_sel_0;
+output [`LM32_D_RESULT_SEL_1_RNG] d_result_sel_1;
+reg    [`LM32_D_RESULT_SEL_1_RNG] d_result_sel_1;
+output x_result_sel_csr;
+reg    x_result_sel_csr;
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+output x_result_sel_mc_arith;
+reg    x_result_sel_mc_arith;
+`endif
+`ifdef LM32_NO_BARREL_SHIFT    
+output x_result_sel_shift;
+reg    x_result_sel_shift;
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+output x_result_sel_sext;
+reg    x_result_sel_sext;
+`endif
+output x_result_sel_logic;
+reg    x_result_sel_logic;
+`ifdef CFG_USER_ENABLED
+output x_result_sel_user;
+reg    x_result_sel_user;
+`endif
+output x_result_sel_add;
+reg    x_result_sel_add;
+output m_result_sel_compare;
+reg    m_result_sel_compare;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+output m_result_sel_shift;
+reg    m_result_sel_shift;
+`endif
+output w_result_sel_load;
+reg    w_result_sel_load;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+output w_result_sel_mul;
+reg    w_result_sel_mul;
+`endif
+output x_bypass_enable;
+wire   x_bypass_enable;
+output m_bypass_enable;
+wire   m_bypass_enable;
+output read_enable_0;
+wire   read_enable_0;
+output [`LM32_REG_IDX_RNG] read_idx_0;
+wire   [`LM32_REG_IDX_RNG] read_idx_0;
+output read_enable_1;
+wire   read_enable_1;
+output [`LM32_REG_IDX_RNG] read_idx_1;
+wire   [`LM32_REG_IDX_RNG] read_idx_1;
+output write_enable;
+wire   write_enable;
+output [`LM32_REG_IDX_RNG] write_idx;
+wire   [`LM32_REG_IDX_RNG] write_idx;
+output [`LM32_WORD_RNG] immediate;
+wire   [`LM32_WORD_RNG] immediate;
+output [`LM32_PC_RNG] branch_offset;
+wire   [`LM32_PC_RNG] branch_offset;
+output load;
+wire   load;
+output store;
+wire   store;
+output [`LM32_SIZE_RNG] size;
+wire   [`LM32_SIZE_RNG] size;
+output sign_extend;
+wire   sign_extend;
+output adder_op;
+wire   adder_op;
+output [`LM32_LOGIC_OP_RNG] logic_op;
+wire   [`LM32_LOGIC_OP_RNG] logic_op;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+output direction;
+wire   direction;
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+output shift_left;
+wire   shift_left;
+output shift_right;
+wire   shift_right;
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+output multiply;
+wire   multiply;
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+output divide;
+wire   divide;
+output modulus;
+wire   modulus;
+`endif
+output branch;
+wire   branch;
+output branch_reg;
+wire   branch_reg;
+output [`LM32_CONDITION_RNG] condition;
+wire   [`LM32_CONDITION_RNG] condition;
+output bi_conditional;
+wire bi_conditional;
+output bi_unconditional;
+wire bi_unconditional;
+`ifdef CFG_DEBUG_ENABLED
+output break_opcode;
+wire   break_opcode;
+`endif
+output scall;
+wire   scall;
+output eret;
+wire   eret;
+`ifdef CFG_DEBUG_ENABLED
+output bret;
+wire   bret;
+`endif
+`ifdef CFG_USER_ENABLED
+output [`LM32_USER_OPCODE_RNG] user_opcode;
+wire   [`LM32_USER_OPCODE_RNG] user_opcode;
+`endif
+output csr_write_enable;
+wire   csr_write_enable;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+wire [`LM32_WORD_RNG] extended_immediate;       // Zero or sign extended immediate
+wire [`LM32_WORD_RNG] high_immediate;           // Immediate as high 16 bits
+wire [`LM32_WORD_RNG] call_immediate;           // Call immediate
+wire [`LM32_WORD_RNG] branch_immediate;         // Conditional branch immediate
+wire sign_extend_immediate;                     // Whether the immediate should be sign extended (`TRUE) or zero extended (`FALSE)
+wire select_high_immediate;                     // Whether to select the high immediate  
+wire select_call_immediate;                     // Whether to select the call immediate 
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+
+/////////////////////////////////////////////////////
+// Combinational logic
+/////////////////////////////////////////////////////
+
+// Determine opcode
+assign op_add    = instruction[`LM32_OP_RNG] == `LM32_OPCODE_ADD;
+assign op_and    = instruction[`LM32_OP_RNG] == `LM32_OPCODE_AND;
+assign op_andhi  = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_ANDHI;
+assign op_b      = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_B;
+assign op_bi     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BI;
+assign op_be     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BE;
+assign op_bg     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BG;
+assign op_bge    = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BGE;
+assign op_bgeu   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BGEU;
+assign op_bgu    = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BGU;
+assign op_bne    = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_BNE;
+assign op_call   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_CALL;
+assign op_calli  = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_CALLI;
+assign op_cmpe   = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPE;
+assign op_cmpg   = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPG;
+assign op_cmpge  = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPGE;
+assign op_cmpgeu = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPGEU;
+assign op_cmpgu  = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPGU;
+assign op_cmpne  = instruction[`LM32_OP_RNG] == `LM32_OPCODE_CMPNE;
+`ifdef CFG_MC_DIVIDE_ENABLED
+assign op_divu   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_DIVU;
+`endif
+assign op_lb     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LB;
+assign op_lbu    = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LBU;
+assign op_lh     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LH;
+assign op_lhu    = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LHU;
+assign op_lw     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_LW;
+`ifdef CFG_MC_DIVIDE_ENABLED
+assign op_modu   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_MODU;
+`endif
+`ifdef LM32_MULTIPLY_ENABLED
+assign op_mul    = instruction[`LM32_OP_RNG] == `LM32_OPCODE_MUL;
+`endif
+assign op_nor    = instruction[`LM32_OP_RNG] == `LM32_OPCODE_NOR;
+assign op_or     = instruction[`LM32_OP_RNG] == `LM32_OPCODE_OR;
+assign op_orhi   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_ORHI;
+assign op_raise  = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_RAISE;
+assign op_rcsr   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_RCSR;
+assign op_sb     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SB;
+`ifdef CFG_SIGN_EXTEND_ENABLED
+assign op_sextb  = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SEXTB;
+assign op_sexth  = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SEXTH;
+`endif
+assign op_sh     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SH;
+`ifdef LM32_BARREL_SHIFT_ENABLED
+assign op_sl     = instruction[`LM32_OP_RNG] == `LM32_OPCODE_SL;      
+`endif
+assign op_sr     = instruction[`LM32_OP_RNG] == `LM32_OPCODE_SR;
+assign op_sru    = instruction[`LM32_OP_RNG] == `LM32_OPCODE_SRU;
+assign op_sub    = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SUB;
+assign op_sw     = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_SW;
+assign op_user   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_USER;
+assign op_wcsr   = instruction[`LM32_OPCODE_RNG] == `LM32_OPCODE_WCSR;
+assign op_xnor   = instruction[`LM32_OP_RNG] == `LM32_OPCODE_XNOR;
+assign op_xor    = instruction[`LM32_OP_RNG] == `LM32_OPCODE_XOR;
+
+// Group opcodes by function
+assign arith = op_add | op_sub;
+assign logical = op_and | op_andhi | op_nor | op_or | op_orhi | op_xor | op_xnor;
+assign cmp = op_cmpe | op_cmpg | op_cmpge | op_cmpgeu | op_cmpgu | op_cmpne;
+assign bi_conditional = op_be | op_bg | op_bge | op_bgeu  | op_bgu | op_bne;
+assign bi_unconditional = op_bi;
+assign bra = op_b | bi_unconditional | bi_conditional;
+assign call = op_call | op_calli;
+`ifdef LM32_BARREL_SHIFT_ENABLED
+assign shift = op_sl | op_sr | op_sru;
+`endif
+`ifdef LM32_NO_BARREL_SHIFT
+assign shift = op_sr | op_sru;
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+assign shift_left = op_sl;
+assign shift_right = op_sr | op_sru;
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+assign sext = op_sextb | op_sexth;
+`endif
+`ifdef LM32_MULTIPLY_ENABLED
+assign multiply = op_mul;
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+assign divide = op_divu; 
+assign modulus = op_modu;
+`endif
+assign load = op_lb | op_lbu | op_lh | op_lhu | op_lw;
+assign store = op_sb | op_sh | op_sw;
+
+// Select pipeline multiplexor controls
+always @(*)
+begin
+    // D stage
+    if (call) 
+        d_result_sel_0 = `LM32_D_RESULT_SEL_0_NEXT_PC;
+    else 
+        d_result_sel_0 = `LM32_D_RESULT_SEL_0_REG_0;
+    if (call) 
+        d_result_sel_1 = `LM32_D_RESULT_SEL_1_ZERO;         
+    else if ((instruction[31] == 1'b0) && !bra) 
+        d_result_sel_1 = `LM32_D_RESULT_SEL_1_IMMEDIATE;
+    else
+        d_result_sel_1 = `LM32_D_RESULT_SEL_1_REG_1; 
+    // X stage
+    x_result_sel_csr = `FALSE;
+`ifdef LM32_MC_ARITHMETIC_ENABLED
+    x_result_sel_mc_arith = `FALSE;
+`endif
+`ifdef LM32_NO_BARREL_SHIFT
+    x_result_sel_shift = `FALSE;
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+    x_result_sel_sext = `FALSE;
+`endif
+    x_result_sel_logic = `FALSE;
+`ifdef CFG_USER_ENABLED        
+    x_result_sel_user = `FALSE;
+`endif
+    x_result_sel_add = `FALSE;
+    if (op_rcsr)
+        x_result_sel_csr = `TRUE;
+`ifdef LM32_MC_ARITHMETIC_ENABLED    
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+    else if (shift_left | shift_right) 
+        x_result_sel_mc_arith = `TRUE;
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+    else if (divide | modulus)
+        x_result_sel_mc_arith = `TRUE;        
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+    else if (multiply)
+        x_result_sel_mc_arith = `TRUE;            
+`endif
+`endif
+`ifdef LM32_NO_BARREL_SHIFT
+    else if (shift)
+        x_result_sel_shift = `TRUE;        
+`endif
+`ifdef CFG_SIGN_EXTEND_ENABLED
+    else if (sext)
+        x_result_sel_sext = `TRUE;
+`endif        
+    else if (logical) 
+        x_result_sel_logic = `TRUE;
+`ifdef CFG_USER_ENABLED        
+    else if (op_user)
+        x_result_sel_user = `TRUE;
+`endif
+    else 
+        x_result_sel_add = `TRUE;        
+    
+    // M stage
+
+    m_result_sel_compare = cmp;
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+    m_result_sel_shift = shift;
+`endif
+
+    // W stage
+    w_result_sel_load = load;
+`ifdef CFG_PL_MULTIPLY_ENABLED
+    w_result_sel_mul = op_mul; 
+`endif
+end
+
+// Set if result is valid at end of X stage
+assign x_bypass_enable =  arith 
+                        | logical
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+                        | shift_left
+                        | shift_right
+`endif                        
+`ifdef CFG_MC_MULTIPLY_ENABLED
+                        | multiply
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+                        | divide
+                        | modulus
+`endif
+`ifdef LM32_NO_BARREL_SHIFT
+                        | shift
+`endif                  
+`ifdef CFG_SIGN_EXTEND_ENABLED
+                        | sext 
+`endif                        
+`ifdef CFG_USER_ENABLED
+                        | op_user
+`endif
+                        | op_rcsr
+                        ;
+// Set if result is valid at end of M stage                        
+assign m_bypass_enable = x_bypass_enable 
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+                        | shift
+`endif
+                        | cmp
+                        ;
+// Register file read port 0                        
+assign read_enable_0 = ~(op_bi | op_calli);
+assign read_idx_0 = instruction[25:21];
+// Register file read port 1 
+assign read_enable_1 = ~(op_bi | op_calli | load);
+assign read_idx_1 = instruction[20:16];
+// Register file write port
+assign write_enable = ~(bra | op_raise | store | op_wcsr);
+assign write_idx = call
+                    ? 5'd29
+                    : instruction[31] == 1'b0 
+                        ? instruction[20:16] 
+                        : instruction[15:11];
+                        
+// Size of load/stores                        
+assign size = instruction[27:26];
+// Whether to sign or zero extend
+assign sign_extend = instruction[28];                      
+// Set adder_op to 1 to perform a subtraction
+assign adder_op = op_sub | op_cmpe | op_cmpg | op_cmpge | op_cmpgeu | op_cmpgu | op_cmpne | bra;
+// Logic operation (and, or, etc)
+assign logic_op = instruction[29:26];
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+// Shift direction
+assign direction = instruction[29];
+`endif
+// Control flow microcodes
+assign branch = bra | call;
+assign branch_reg = op_call | op_b;
+assign condition = instruction[28:26];      
+`ifdef CFG_DEBUG_ENABLED
+assign break_opcode = op_raise & ~instruction[2];
+`endif
+assign scall = op_raise & instruction[2];
+assign eret = op_b & (instruction[25:21] == 5'd30);
+`ifdef CFG_DEBUG_ENABLED
+assign bret = op_b & (instruction[25:21] == 5'd31);
+`endif
+`ifdef CFG_USER_ENABLED
+// Extract user opcode
+assign user_opcode = instruction[10:0];
+`endif
+// CSR read/write
+assign csr_write_enable = op_wcsr;
+
+// Extract immediate from instruction
+
+assign sign_extend_immediate = ~(op_and | op_cmpgeu | op_cmpgu | op_nor | op_or | op_xnor | op_xor);
+assign select_high_immediate = op_andhi | op_orhi;
+assign select_call_immediate = instruction[31];
+
+assign high_immediate = {instruction[15:0], 16'h0000};
+assign extended_immediate = {{16{sign_extend_immediate & instruction[15]}}, instruction[15:0]};
+assign call_immediate = {{6{instruction[25]}}, instruction[25:0]};
+assign branch_immediate = {{16{instruction[15]}}, instruction[15:0]};
+
+assign immediate = select_high_immediate == `TRUE 
+                        ? high_immediate 
+                        : extended_immediate;
+   
+assign branch_offset = select_call_immediate == `TRUE   
+                        ? call_immediate
+                        : branch_immediate;
+    
+endmodule 
+
diff --git a/verilog/lm32/lm32_dp_ram.v b/verilog/lm32/lm32_dp_ram.v
new file mode 100644 (file)
index 0000000..1d7f4f1
--- /dev/null
@@ -0,0 +1,35 @@
+module lm32_dp_ram(
+       clk_i,
+       rst_i,
+       we_i,
+       waddr_i,
+       wdata_i,
+       raddr_i,
+       rdata_o);
+
+parameter addr_width = 32;
+parameter addr_depth = 1024;
+parameter data_width = 8;
+
+input clk_i;
+input rst_i;
+input we_i;
+input [addr_width-1:0] waddr_i;
+input [data_width-1:0] wdata_i;
+input [addr_width-1:0] raddr_i;
+output [data_width-1:0] rdata_o;
+
+reg [data_width-1:0] ram[addr_depth-1:0];
+
+reg [addr_width-1:0] raddr_r;
+assign rdata_o = ram[raddr_r];
+
+always @ (posedge clk_i)
+begin
+       if (we_i)
+               ram[waddr_i] <= wdata_i;
+       raddr_r <= raddr_i;
+end
+
+endmodule
+
diff --git a/verilog/lm32/lm32_functions.v b/verilog/lm32/lm32_functions.v
new file mode 100644 (file)
index 0000000..1332a6e
--- /dev/null
@@ -0,0 +1,70 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project      : LatticeMico32
+// File         : lm32_functions.v
+// Title        : Common functions
+// Version      : 6.1.17
+//              : Initial Release
+// Version      : 7.0SP2, 3.0
+//              : No Change
+// Version      : 3.5
+//              : Added function to generate log-of-two that rounds-up to
+//              : power-of-two
+// =============================================================================
+                                         
+function integer clogb2;
+input [31:0] value;
+begin
+   for (clogb2 = 0; value > 0; clogb2 = clogb2 + 1)
+        value = value >> 1;
+end
+endfunction 
+
+function integer clogb2_v1;
+input [31:0] value;
+reg   [31:0] i;
+reg   [31:0] temp;
+begin
+   temp = 0;
+   i    = 0;
+   for (i = 0; temp < value; i = i + 1)  
+       temp = 1<<i;
+   clogb2_v1 = i-1;
+end
+endfunction
+
diff --git a/verilog/lm32/lm32_icache.v b/verilog/lm32/lm32_icache.v
new file mode 100644 (file)
index 0000000..9f1a759
--- /dev/null
@@ -0,0 +1,481 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_icache.v
+// Title            : Instruction cache
+// Dependencies     : lm32_include.v
+// 
+// Version 3.5
+// 1. Bug Fix: Instruction cache flushes issued from Instruction Inline Memory
+//    cause segmentation fault due to incorrect fetches.
+//
+// Version 3.1
+// 1. Feature: Support for user-selected resource usage when implementing
+//    cache memory. Additional parameters must be defined when invoking module
+//    lm32_ram. Instruction cache miss mechanism is dependent on branch
+//    prediction being performed in D stage of pipeline.
+//
+// Version 7.0SP2, 3.0
+// No change
+// =============================================================================
+                                         
+`include "lm32_include.v"
+
+`ifdef CFG_ICACHE_ENABLED
+
+`define LM32_IC_ADDR_OFFSET_RNG          addr_offset_msb:addr_offset_lsb
+`define LM32_IC_ADDR_SET_RNG             addr_set_msb:addr_set_lsb
+`define LM32_IC_ADDR_TAG_RNG             addr_tag_msb:addr_tag_lsb
+`define LM32_IC_ADDR_IDX_RNG             addr_set_msb:addr_offset_lsb
+
+`define LM32_IC_TMEM_ADDR_WIDTH          addr_set_width
+`define LM32_IC_TMEM_ADDR_RNG            (`LM32_IC_TMEM_ADDR_WIDTH-1):0
+`define LM32_IC_DMEM_ADDR_WIDTH          (addr_offset_width+addr_set_width)
+`define LM32_IC_DMEM_ADDR_RNG            (`LM32_IC_DMEM_ADDR_WIDTH-1):0
+
+`define LM32_IC_TAGS_WIDTH               (addr_tag_width+1)
+`define LM32_IC_TAGS_RNG                 (`LM32_IC_TAGS_WIDTH-1):0
+`define LM32_IC_TAGS_TAG_RNG             (`LM32_IC_TAGS_WIDTH-1):1
+`define LM32_IC_TAGS_VALID_RNG           0
+
+`define LM32_IC_STATE_RNG                3:0
+`define LM32_IC_STATE_FLUSH_INIT         4'b0001
+`define LM32_IC_STATE_FLUSH              4'b0010
+`define LM32_IC_STATE_CHECK              4'b0100
+`define LM32_IC_STATE_REFILL             4'b1000
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_icache ( 
+    // ----- Inputs -----
+    clk_i,
+    rst_i,    
+    stall_a,
+    stall_f,
+    address_a,
+    address_f,
+    read_enable_f,
+    refill_ready,
+    refill_data,
+    iflush,
+`ifdef CFG_IROM_ENABLED
+    select_f,
+`endif
+    valid_d,
+    branch_predict_taken_d,
+    // ----- Outputs -----
+    stall_request,
+    restart_request,
+    refill_request,
+    refill_address,
+    refilling,
+    inst
+    );
+
+/////////////////////////////////////////////////////
+// Parameters
+/////////////////////////////////////////////////////
+
+parameter associativity = 1;                            // Associativity of the cache (Number of ways)
+parameter sets = 512;                                   // Number of sets
+parameter bytes_per_line = 16;                          // Number of bytes per cache line
+parameter base_address = 0;                             // Base address of cachable memory
+parameter limit = 0;                                    // Limit (highest address) of cachable memory
+
+localparam addr_offset_width = clogb2(bytes_per_line)-1-2;
+localparam addr_set_width = clogb2(sets)-1;
+localparam addr_offset_lsb = 2;
+localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
+localparam addr_set_lsb = (addr_offset_msb+1);
+localparam addr_set_msb = (addr_set_lsb+addr_set_width-1);
+localparam addr_tag_lsb = (addr_set_msb+1);
+localparam addr_tag_msb = clogb2(`CFG_ICACHE_LIMIT-`CFG_ICACHE_BASE_ADDRESS)-1;
+localparam addr_tag_width = (addr_tag_msb-addr_tag_lsb+1);
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                        // Clock 
+input rst_i;                                        // Reset
+
+input stall_a;                                      // Stall instruction in A stage
+input stall_f;                                      // Stall instruction in F stage
+
+input valid_d;                                      // Valid instruction in D stage
+input branch_predict_taken_d;                       // Instruction in D stage is a branch and is predicted taken
+   
+input [`LM32_PC_RNG] address_a;                     // Address of instruction in A stage
+input [`LM32_PC_RNG] address_f;                     // Address of instruction in F stage
+input read_enable_f;                                // Indicates if cache access is valid
+
+input refill_ready;                                 // Next word of refill data is ready
+input [`LM32_INSTRUCTION_RNG] refill_data;          // Data to refill the cache with
+
+input iflush;                                       // Flush the cache
+`ifdef CFG_IROM_ENABLED
+input select_f;                                     // Instruction in F stage is mapped through instruction cache
+`endif
+   
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output stall_request;                               // Request to stall the pipeline
+wire   stall_request;
+output restart_request;                             // Request to restart instruction that caused the cache miss
+reg    restart_request;
+output refill_request;                              // Request to refill a cache line
+wire   refill_request;
+output [`LM32_PC_RNG] refill_address;               // Base address of cache refill
+reg    [`LM32_PC_RNG] refill_address;               
+output refilling;                                   // Indicates the instruction cache is currently refilling
+reg    refilling;
+output [`LM32_INSTRUCTION_RNG] inst;                // Instruction read from cache
+wire   [`LM32_INSTRUCTION_RNG] inst;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+wire enable;
+wire [0:associativity-1] way_mem_we;
+wire [`LM32_INSTRUCTION_RNG] way_data[0:associativity-1];
+wire [`LM32_IC_TAGS_TAG_RNG] way_tag[0:associativity-1];
+wire [0:associativity-1] way_valid;
+wire [0:associativity-1] way_match;
+wire miss;
+
+wire [`LM32_IC_TMEM_ADDR_RNG] tmem_read_address;
+wire [`LM32_IC_TMEM_ADDR_RNG] tmem_write_address;
+wire [`LM32_IC_DMEM_ADDR_RNG] dmem_read_address;
+wire [`LM32_IC_DMEM_ADDR_RNG] dmem_write_address;
+wire [`LM32_IC_TAGS_RNG] tmem_write_data;
+
+reg [`LM32_IC_STATE_RNG] state;
+wire flushing;
+wire check;
+wire refill;
+
+reg [associativity-1:0] refill_way_select;
+reg [`LM32_IC_ADDR_OFFSET_RNG] refill_offset;
+wire last_refill;
+reg [`LM32_IC_TMEM_ADDR_RNG] flush_set;
+
+genvar i;
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+
+/////////////////////////////////////////////////////
+// Instantiations
+/////////////////////////////////////////////////////
+
+   generate
+      for (i = 0; i < associativity; i = i + 1)
+       begin : memories
+          
+          lm32_ram 
+            #(
+              // ----- Parameters -------
+              .data_width                 (32),
+              .address_width              (`LM32_IC_DMEM_ADDR_WIDTH)
+// Modified for Milkymist: removed non-portable RAM parameters
+) 
+          way_0_data_ram 
+            (
+             // ----- Inputs -------
+             .read_clk                   (clk_i),
+             .write_clk                  (clk_i),
+             .reset                      (rst_i),
+             .read_address               (dmem_read_address),
+             .enable_read                (enable),
+             .write_address              (dmem_write_address),
+             .enable_write               (`TRUE),
+             .write_enable               (way_mem_we[i]),
+             .write_data                 (refill_data),    
+             // ----- Outputs -------
+             .read_data                  (way_data[i])
+             );
+          
+          lm32_ram 
+            #(
+              // ----- Parameters -------
+              .data_width                 (`LM32_IC_TAGS_WIDTH),
+              .address_width              (`LM32_IC_TMEM_ADDR_WIDTH)
+// Modified for Milkymist: removed non-portable RAM parameters
+              ) 
+          way_0_tag_ram 
+            (
+             // ----- Inputs -------
+             .read_clk                   (clk_i),
+             .write_clk                  (clk_i),
+             .reset                      (rst_i),
+             .read_address               (tmem_read_address),
+             .enable_read                (enable),
+             .write_address              (tmem_write_address),
+             .enable_write               (`TRUE),
+             .write_enable               (way_mem_we[i] | flushing),
+             .write_data                 (tmem_write_data),
+             // ----- Outputs -------
+             .read_data                  ({way_tag[i], way_valid[i]})
+             );
+          
+       end
+endgenerate
+
+/////////////////////////////////////////////////////
+// Combinational logic
+/////////////////////////////////////////////////////
+
+// Compute which ways in the cache match the address address being read
+generate
+    for (i = 0; i < associativity; i = i + 1)
+    begin : match
+assign way_match[i] = ({way_tag[i], way_valid[i]} == {address_f[`LM32_IC_ADDR_TAG_RNG], `TRUE});
+    end
+endgenerate
+
+// Select data from way that matched the address being read     
+generate
+    if (associativity == 1)
+    begin : inst_1
+assign inst = way_match[0] ? way_data[0] : 32'b0;
+    end
+    else if (associativity == 2)
+        begin : inst_2
+assign inst = way_match[0] ? way_data[0] : (way_match[1] ? way_data[1] : 32'b0);
+    end
+endgenerate
+
+// Compute address to use to index into the data memories
+generate 
+    if (bytes_per_line > 4)
+assign dmem_write_address = {refill_address[`LM32_IC_ADDR_SET_RNG], refill_offset};
+    else
+assign dmem_write_address = refill_address[`LM32_IC_ADDR_SET_RNG];
+endgenerate
+    
+assign dmem_read_address = address_a[`LM32_IC_ADDR_IDX_RNG];
+
+// Compute address to use to index into the tag memories                        
+assign tmem_read_address = address_a[`LM32_IC_ADDR_SET_RNG];
+assign tmem_write_address = flushing 
+                                ? flush_set
+                                : refill_address[`LM32_IC_ADDR_SET_RNG];
+
+// Compute signal to indicate when we are on the last refill accesses
+generate 
+    if (bytes_per_line > 4)                            
+assign last_refill = refill_offset == {addr_offset_width{1'b1}};
+    else
+assign last_refill = `TRUE;
+endgenerate
+
+// Compute data and tag memory access enable
+assign enable = (stall_a == `FALSE);
+
+// Compute data and tag memory write enables
+generate
+    if (associativity == 1) 
+    begin : we_1     
+assign way_mem_we[0] = (refill_ready == `TRUE);
+    end
+    else
+    begin : we_2
+assign way_mem_we[0] = (refill_ready == `TRUE) && (refill_way_select[0] == `TRUE);
+assign way_mem_we[1] = (refill_ready == `TRUE) && (refill_way_select[1] == `TRUE);
+    end
+endgenerate                     
+
+// On the last refill cycle set the valid bit, for all other writes it should be cleared
+assign tmem_write_data[`LM32_IC_TAGS_VALID_RNG] = last_refill & !flushing;
+assign tmem_write_data[`LM32_IC_TAGS_TAG_RNG] = refill_address[`LM32_IC_ADDR_TAG_RNG];
+
+// Signals that indicate which state we are in
+assign flushing = |state[1:0];
+assign check = state[2];
+assign refill = state[3];
+
+assign miss = (~(|way_match)) && (read_enable_f == `TRUE) && (stall_f == `FALSE) && !(valid_d && branch_predict_taken_d);
+assign stall_request = (check == `FALSE);
+assign refill_request = (refill == `TRUE);
+                      
+/////////////////////////////////////////////////////
+// Sequential logic
+/////////////////////////////////////////////////////
+
+// Record way selected for replacement on a cache miss
+generate
+    if (associativity >= 2) 
+    begin : way_select      
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        refill_way_select <= {{associativity-1{1'b0}}, 1'b1};
+    else
+    begin        
+        if (miss == `TRUE)
+            refill_way_select <= {refill_way_select[0], refill_way_select[1]};
+    end
+end
+    end
+endgenerate
+
+// Record whether we are refilling
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        refilling <= `FALSE;
+    else
+        refilling <= refill;
+end
+
+// Instruction cache control FSM
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        state <= `LM32_IC_STATE_FLUSH_INIT;
+        flush_set <= {`LM32_IC_TMEM_ADDR_WIDTH{1'b1}};
+        refill_address <= {`LM32_PC_WIDTH{1'bx}};
+        restart_request <= `FALSE;
+    end
+    else 
+    begin
+        case (state)
+
+        // Flush the cache for the first time after reset
+        `LM32_IC_STATE_FLUSH_INIT:
+        begin            
+            if (flush_set == {`LM32_IC_TMEM_ADDR_WIDTH{1'b0}})
+                state <= `LM32_IC_STATE_CHECK;
+            flush_set <= flush_set - 1'b1;
+        end
+
+        // Flush the cache in response to an write to the ICC CSR
+        `LM32_IC_STATE_FLUSH:
+        begin            
+            if (flush_set == {`LM32_IC_TMEM_ADDR_WIDTH{1'b0}})
+`ifdef CFG_IROM_ENABLED
+             if (select_f)
+                state <= `LM32_IC_STATE_REFILL;
+             else
+`endif
+               state <= `LM32_IC_STATE_CHECK;
+          
+            flush_set <= flush_set - 1'b1;
+        end
+        
+        // Check for cache misses
+        `LM32_IC_STATE_CHECK:
+        begin            
+            if (stall_a == `FALSE)
+                restart_request <= `FALSE;
+            if (iflush == `TRUE)
+            begin
+                refill_address <= address_f;
+                state <= `LM32_IC_STATE_FLUSH;
+            end
+            else if (miss == `TRUE)
+            begin
+                refill_address <= address_f;
+                state <= `LM32_IC_STATE_REFILL;
+            end
+        end
+
+        // Refill a cache line
+        `LM32_IC_STATE_REFILL:
+        begin            
+            if (refill_ready == `TRUE)
+            begin
+                if (last_refill == `TRUE)
+                begin
+                    restart_request <= `TRUE;
+                    state <= `LM32_IC_STATE_CHECK;
+                end
+            end
+        end
+
+        endcase        
+    end
+end
+
+generate 
+    if (bytes_per_line > 4)
+    begin
+// Refill offset
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        refill_offset <= {addr_offset_width{1'b0}};
+    else 
+    begin
+        case (state)
+        
+        // Check for cache misses
+        `LM32_IC_STATE_CHECK:
+        begin            
+            if (iflush == `TRUE)
+                refill_offset <= {addr_offset_width{1'b0}};
+            else if (miss == `TRUE)
+                refill_offset <= {addr_offset_width{1'b0}};
+        end
+
+        // Refill a cache line
+        `LM32_IC_STATE_REFILL:
+        begin            
+            if (refill_ready == `TRUE)
+                refill_offset <= refill_offset + 1'b1;
+        end
+
+        endcase        
+    end
+end
+    end
+endgenerate
+   
+endmodule
+
+`endif
+
diff --git a/verilog/lm32/lm32_include.v b/verilog/lm32/lm32_include.v
new file mode 100644 (file)
index 0000000..512c68b
--- /dev/null
@@ -0,0 +1,377 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_include.v
+// Title            : CPU global macros
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// Version          : 3.2
+//                  : No Change
+// Version          : 3.3
+//                  : Support for extended configuration register
+// =============================================================================
+
+`ifdef LM32_INCLUDE_V
+`else
+`define LM32_INCLUDE_V
+
+//
+// Common configuration options
+//
+
+`define CFG_EBA_RESET 32'h00860000
+`define CFG_DEBA_RESET 32'h10000000
+
+`define CFG_PL_MULTIPLY_ENABLED
+`define CFG_PL_BARREL_SHIFT_ENABLED
+`define CFG_SIGN_EXTEND_ENABLED
+`define CFG_MC_DIVIDE_ENABLED
+`define CFG_EBR_POSEDGE_REGISTER_FILE
+
+`define CFG_ICACHE_ENABLED
+`define CFG_ICACHE_ASSOCIATIVITY   1
+`define CFG_ICACHE_SETS            256
+`define CFG_ICACHE_BYTES_PER_LINE  16
+`define CFG_ICACHE_BASE_ADDRESS    32'h0
+`define CFG_ICACHE_LIMIT           32'h7fffffff
+
+`define CFG_DCACHE_ENABLED
+`define CFG_DCACHE_ASSOCIATIVITY   1
+`define CFG_DCACHE_SETS            256
+`define CFG_DCACHE_BYTES_PER_LINE  16
+`define CFG_DCACHE_BASE_ADDRESS    32'h0
+`define CFG_DCACHE_LIMIT           32'h7fffffff
+
+// Enable Debugging
+//`define CFG_JTAG_ENABLED
+//`define CFG_JTAG_UART_ENABLED
+//`define CFG_DEBUG_ENABLED
+//`define CFG_HW_DEBUG_ENABLED
+//`define CFG_ROM_DEBUG_ENABLED
+//`define CFG_BREAKPOINTS 32'h4
+//`define CFG_WATCHPOINTS 32'h4
+//`define CFG_EXTERNAL_BREAK_ENABLED
+//`define CFG_GDBSTUB_ENABLED
+
+//
+// End of common configuration options
+//
+
+`ifdef TRUE
+`else
+`define TRUE    1'b1
+`define FALSE   1'b0
+`define TRUE_N  1'b0
+`define FALSE_N 1'b1
+`endif
+
+// Wishbone configuration
+`define CFG_IWB_ENABLED
+`define CFG_DWB_ENABLED
+
+// Data-path width
+`define LM32_WORD_WIDTH                 32
+`define LM32_WORD_RNG                   (`LM32_WORD_WIDTH-1):0
+`define LM32_SHIFT_WIDTH                5
+`define LM32_SHIFT_RNG                  (`LM32_SHIFT_WIDTH-1):0
+`define LM32_BYTE_SELECT_WIDTH          4
+`define LM32_BYTE_SELECT_RNG            (`LM32_BYTE_SELECT_WIDTH-1):0
+
+// Register file size
+`define LM32_REGISTERS                  32
+`define LM32_REG_IDX_WIDTH              5
+`define LM32_REG_IDX_RNG                (`LM32_REG_IDX_WIDTH-1):0
+
+// Standard register numbers
+`define LM32_RA_REG                     `LM32_REG_IDX_WIDTH'd29
+`define LM32_EA_REG                     `LM32_REG_IDX_WIDTH'd30
+`define LM32_BA_REG                     `LM32_REG_IDX_WIDTH'd31
+
+// Range of Program Counter. Two LSBs are always 0. 
+`define LM32_PC_WIDTH                   (`LM32_WORD_WIDTH-2)
+`define LM32_PC_RNG                     (`LM32_PC_WIDTH+2-1):2
+
+// Range of an instruction
+`define LM32_INSTRUCTION_WIDTH          32
+`define LM32_INSTRUCTION_RNG            (`LM32_INSTRUCTION_WIDTH-1):0
+
+// Adder operation
+`define LM32_ADDER_OP_ADD               1'b0
+`define LM32_ADDER_OP_SUBTRACT          1'b1
+
+// Shift direction
+`define LM32_SHIFT_OP_RIGHT             1'b0
+`define LM32_SHIFT_OP_LEFT              1'b1
+
+// Bus errors
+`define CFG_BUS_ERRORS_ENABLED
+
+// Derive macro that indicates whether we have single-stepping or not
+`ifdef CFG_ROM_DEBUG_ENABLED
+`define LM32_SINGLE_STEP_ENABLED
+`else
+`ifdef CFG_HW_DEBUG_ENABLED
+`define LM32_SINGLE_STEP_ENABLED
+`endif
+`endif
+
+// Derive macro that indicates whether JTAG interface is required
+`ifdef CFG_JTAG_UART_ENABLED
+`define LM32_JTAG_ENABLED
+`else
+`ifdef CFG_DEBUG_ENABLED
+`define LM32_JTAG_ENABLED
+`else
+`endif
+`endif
+
+// Derive macro that indicates whether ROM debug is required
+`ifdef CFG_GDBSTUB_ENABLED
+`define CFG_ROM_DEBUG_ENABLED
+`endif
+
+// Derive macro that indicates whether we have a barrel-shifter or not
+`ifdef CFG_PL_BARREL_SHIFT_ENABLED
+`define LM32_BARREL_SHIFT_ENABLED
+`else // CFG_PL_BARREL_SHIFT_ENABLED
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+`define LM32_BARREL_SHIFT_ENABLED
+`else
+`define LM32_NO_BARREL_SHIFT
+`endif
+`endif // CFG_PL_BARREL_SHIFT_ENABLED
+
+// Derive macro that indicates whether we have a multiplier or not
+`ifdef CFG_PL_MULTIPLY_ENABLED
+`define LM32_MULTIPLY_ENABLED
+`else
+`ifdef CFG_MC_MULTIPLY_ENABLED
+`define LM32_MULTIPLY_ENABLED
+`endif
+`endif
+
+// Derive a macro that indicates whether or not the multi-cycle arithmetic unit is required
+`ifdef CFG_MC_DIVIDE_ENABLED
+`define LM32_MC_ARITHMETIC_ENABLED
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+`define LM32_MC_ARITHMETIC_ENABLED
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+`define LM32_MC_ARITHMETIC_ENABLED
+`endif
+
+// Derive macro that indicates if we are using an EBR register file
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+`define LM32_EBR_REGISTER_FILE
+`endif
+`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE
+`define LM32_EBR_REGISTER_FILE
+`endif
+
+// Revision number
+`define LM32_REVISION                   6'h02
+
+// Logical operations - Function encoded directly in instruction
+`define LM32_LOGIC_OP_RNG               3:0
+
+// Conditions for conditional branches
+`define LM32_CONDITION_WIDTH            3
+`define LM32_CONDITION_RNG              (`LM32_CONDITION_WIDTH-1):0
+`define LM32_CONDITION_E                3'b001
+`define LM32_CONDITION_G                3'b010
+`define LM32_CONDITION_GE               3'b011
+`define LM32_CONDITION_GEU              3'b100
+`define LM32_CONDITION_GU               3'b101
+`define LM32_CONDITION_NE               3'b111
+`define LM32_CONDITION_U1               3'b000
+`define LM32_CONDITION_U2               3'b110
+
+// Size of load or store instruction - Encoding corresponds to opcode
+`define LM32_SIZE_WIDTH                 2
+`define LM32_SIZE_RNG                   1:0
+`define LM32_SIZE_BYTE                  2'b00
+`define LM32_SIZE_HWORD                 2'b11
+`define LM32_SIZE_WORD                  2'b10
+`define LM32_ADDRESS_LSBS_WIDTH         2
+
+// Width and range of a CSR index
+`ifdef CFG_DEBUG_ENABLED
+`define LM32_CSR_WIDTH                  5
+`define LM32_CSR_RNG                    (`LM32_CSR_WIDTH-1):0
+`else
+`ifdef CFG_JTAG_ENABLED
+`define LM32_CSR_WIDTH                  4
+`define LM32_CSR_RNG                    (`LM32_CSR_WIDTH-1):0
+`else
+`define LM32_CSR_WIDTH                  3
+`define LM32_CSR_RNG                    (`LM32_CSR_WIDTH-1):0
+`endif
+`endif
+
+// CSR indices
+`define LM32_CSR_IE                     `LM32_CSR_WIDTH'h0
+`define LM32_CSR_IM                     `LM32_CSR_WIDTH'h1
+`define LM32_CSR_IP                     `LM32_CSR_WIDTH'h2
+`define LM32_CSR_ICC                    `LM32_CSR_WIDTH'h3
+`define LM32_CSR_DCC                    `LM32_CSR_WIDTH'h4
+`define LM32_CSR_CC                     `LM32_CSR_WIDTH'h5
+`define LM32_CSR_CFG                    `LM32_CSR_WIDTH'h6
+`define LM32_CSR_EBA                    `LM32_CSR_WIDTH'h7
+`ifdef CFG_DEBUG_ENABLED
+`define LM32_CSR_DC                     `LM32_CSR_WIDTH'h8
+`define LM32_CSR_DEBA                   `LM32_CSR_WIDTH'h9
+`endif
+`define LM32_CSR_CFG2                   `LM32_CSR_WIDTH'ha
+`ifdef CFG_JTAG_ENABLED
+`define LM32_CSR_JTX                    `LM32_CSR_WIDTH'he
+`define LM32_CSR_JRX                    `LM32_CSR_WIDTH'hf
+`endif
+`ifdef CFG_DEBUG_ENABLED
+`define LM32_CSR_BP0                    `LM32_CSR_WIDTH'h10
+`define LM32_CSR_BP1                    `LM32_CSR_WIDTH'h11
+`define LM32_CSR_BP2                    `LM32_CSR_WIDTH'h12
+`define LM32_CSR_BP3                    `LM32_CSR_WIDTH'h13
+`define LM32_CSR_WP0                    `LM32_CSR_WIDTH'h18
+`define LM32_CSR_WP1                    `LM32_CSR_WIDTH'h19
+`define LM32_CSR_WP2                    `LM32_CSR_WIDTH'h1a
+`define LM32_CSR_WP3                    `LM32_CSR_WIDTH'h1b
+`endif 
+
+// Values for WPC CSR
+`define LM32_WPC_C_RNG                  1:0
+`define LM32_WPC_C_DISABLED             2'b00
+`define LM32_WPC_C_READ                 2'b01
+`define LM32_WPC_C_WRITE                2'b10
+`define LM32_WPC_C_READ_WRITE           2'b11
+
+// Exception IDs
+`define LM32_EID_WIDTH                  3
+`define LM32_EID_RNG                    (`LM32_EID_WIDTH-1):0
+`define LM32_EID_RESET                  3'h0
+`define LM32_EID_BREAKPOINT             3'd1
+`define LM32_EID_INST_BUS_ERROR         3'h2
+`define LM32_EID_WATCHPOINT             3'd3
+`define LM32_EID_DATA_BUS_ERROR         3'h4
+`define LM32_EID_DIVIDE_BY_ZERO         3'h5
+`define LM32_EID_INTERRUPT              3'h6
+`define LM32_EID_SCALL                  3'h7
+
+// Pipeline result selection mux controls
+
+`define LM32_D_RESULT_SEL_0_RNG          0:0
+`define LM32_D_RESULT_SEL_0_REG_0        1'b0
+`define LM32_D_RESULT_SEL_0_NEXT_PC      1'b1
+
+`define LM32_D_RESULT_SEL_1_RNG          1:0
+`define LM32_D_RESULT_SEL_1_ZERO         2'b00
+`define LM32_D_RESULT_SEL_1_REG_1        2'b01
+`define LM32_D_RESULT_SEL_1_IMMEDIATE    2'b10
+
+`define LM32_USER_OPCODE_WIDTH           11
+`define LM32_USER_OPCODE_RNG             (`LM32_USER_OPCODE_WIDTH-1):0
+
+// Derive a macro to indicate if either of the caches are implemented
+`ifdef CFG_ICACHE_ENABLED
+`define LM32_CACHE_ENABLED
+`else
+`ifdef CFG_DCACHE_ENABLED
+`define LM32_CACHE_ENABLED
+`endif
+`endif
+
+/////////////////////////////////////////////////////
+// Interrupts
+/////////////////////////////////////////////////////
+
+// Always enable interrupts
+`define CFG_INTERRUPTS_ENABLED
+
+// Currently this is fixed to 32 and should not be changed
+`define CFG_INTERRUPTS                  32
+`define LM32_INTERRUPT_WIDTH            `CFG_INTERRUPTS
+`define LM32_INTERRUPT_RNG              (`LM32_INTERRUPT_WIDTH-1):0
+
+/////////////////////////////////////////////////////
+// General
+/////////////////////////////////////////////////////
+
+// Sub-word range types
+`define LM32_BYTE_WIDTH                 8
+`define LM32_BYTE_RNG                   7:0
+`define LM32_HWORD_WIDTH                16
+`define LM32_HWORD_RNG                  15:0
+
+// Word sub-byte indicies
+`define LM32_BYTE_0_RNG                  7:0
+`define LM32_BYTE_1_RNG                  15:8
+`define LM32_BYTE_2_RNG                  23:16
+`define LM32_BYTE_3_RNG                  31:24
+
+// Word sub-halfword indices
+`define LM32_HWORD_0_RNG                 15:0
+`define LM32_HWORD_1_RNG                 31:16
+
+// Use a synchronous reset
+`define CFG_RESET_SENSITIVITY
+
+// Wishbone defines 
+// Refer to Wishbone System-on-Chip Interconnection Architecture
+// These should probably be moved to a Wishbone common file
+
+// Wishbone cycle types
+`define LM32_CTYPE_WIDTH                3
+`define LM32_CTYPE_RNG                  (`LM32_CTYPE_WIDTH-1):0
+`define LM32_CTYPE_CLASSIC              3'b000
+`define LM32_CTYPE_CONSTANT             3'b001
+`define LM32_CTYPE_INCREMENTING         3'b010
+`define LM32_CTYPE_END                  3'b111
+
+// Wishbone burst types
+`define LM32_BTYPE_WIDTH                2
+`define LM32_BTYPE_RNG                  (`LM32_BTYPE_WIDTH-1):0
+`define LM32_BTYPE_LINEAR               2'b00
+`define LM32_BTYPE_4_BEAT               2'b01
+`define LM32_BTYPE_8_BEAT               2'b10
+`define LM32_BTYPE_16_BEAT              2'b11
+
+`endif
diff --git a/verilog/lm32/lm32_instruction_unit.v b/verilog/lm32/lm32_instruction_unit.v
new file mode 100644 (file)
index 0000000..10a2d9c
--- /dev/null
@@ -0,0 +1,889 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project      : LatticeMico32
+// File         : lm32_instruction_unit.v
+// Title        : Instruction unit
+// Dependencies : lm32_include.v
+// Version      : 6.1.17
+//              : Initial Release
+// Version      : 7.0SP2, 3.0
+//              : No Change
+// Version      : 3.1
+//              : Support for static branch prediction is added. Fetching of
+//              : instructions can also be altered by branches predicted in D
+//              : stage of pipeline, and mispredicted branches in the X and M 
+//              : stages of the pipeline.
+// Version      : 3.2
+//              : EBRs use SYNC resets instead of ASYNC resets.
+// Version      : 3.3
+//              : Support for a non-cacheable Instruction Memory that has a 
+//              : single-cycle access latency. This memory can be accessed by
+//              : data port of LM32 (so that debugger has access to it).
+// Version      : 3.4
+//              : No change
+// Version      : 3.5
+//              : Bug fix: Inline memory is correctly generated if it is not a
+//              : power-of-two.
+//              : Bug fix: Fixed a bug that caused LM32 (configured without
+//              : instruction cache) to lock up in to an infinite loop due to a 
+//              : instruction bus error when EBA was set to instruction inline
+//              : memory.
+// Version      : 3.8 
+//              : Feature: Support for dynamically switching EBA to DEBA via a 
+//              : GPIO.
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_instruction_unit (
+    // ----- Inputs -------
+    clk_i,
+    rst_i,
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+    at_debug,
+ `endif
+`endif
+    // From pipeline
+    stall_a,
+    stall_f,
+    stall_d,
+    stall_x,
+    stall_m,
+    valid_f,
+    valid_d,
+    kill_f,
+    branch_predict_taken_d,
+    branch_predict_address_d,
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+    branch_taken_x,
+    branch_target_x,
+`endif
+    exception_m,
+    branch_taken_m,
+    branch_mispredict_taken_m,
+    branch_target_m,
+`ifdef CFG_ICACHE_ENABLED
+    iflush,
+`endif
+`ifdef CFG_DCACHE_ENABLED
+    dcache_restart_request,
+    dcache_refill_request,
+    dcache_refilling,
+`endif        
+`ifdef CFG_IROM_ENABLED
+    irom_store_data_m,
+    irom_address_xm,
+    irom_we_xm,
+`endif
+`ifdef CFG_IWB_ENABLED
+    // From Wishbone
+    i_dat_i,
+    i_ack_i,
+    i_err_i,
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    jtag_read_enable,
+    jtag_write_enable,
+    jtag_write_data,
+    jtag_address,
+`endif
+    // ----- Outputs -------
+    // To pipeline
+    pc_f,
+    pc_d,
+    pc_x,
+    pc_m,
+    pc_w,
+`ifdef CFG_ICACHE_ENABLED
+    icache_stall_request,
+    icache_restart_request,
+    icache_refill_request,
+    icache_refilling,
+`endif
+`ifdef CFG_IROM_ENABLED
+    irom_data_m,
+`endif
+`ifdef CFG_IWB_ENABLED
+    // To Wishbone
+    i_dat_o,
+    i_adr_o,
+    i_cyc_o,
+    i_sel_o,
+    i_stb_o,
+    i_we_o,
+    i_cti_o,
+    i_lock_o,
+    i_bte_o,
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    jtag_read_data,
+    jtag_access_complete,
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+    bus_error_d,
+`endif
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+    instruction_f,
+`endif    
+    instruction_d
+    );
+
+/////////////////////////////////////////////////////
+// Parameters
+/////////////////////////////////////////////////////
+
+parameter associativity = 1;                            // Associativity of the cache (Number of ways)
+parameter sets = 512;                                   // Number of sets
+parameter bytes_per_line = 16;                          // Number of bytes per cache line
+parameter base_address = 0;                             // Base address of cachable memory
+parameter limit = 0;                                    // Limit (highest address) of cachable memory
+
+// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used 
+localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2;
+localparam addr_offset_lsb = 2;
+localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                            // Clock
+input rst_i;                                            // Reset
+
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+   input at_debug;                                      // GPIO input that maps EBA to DEBA
+ `endif
+`endif
+
+input stall_a;                                          // Stall A stage instruction
+input stall_f;                                          // Stall F stage instruction
+input stall_d;                                          // Stall D stage instruction
+input stall_x;                                          // Stall X stage instruction
+input stall_m;                                          // Stall M stage instruction
+input valid_f;                                          // Instruction in F stage is valid
+input valid_d;                                          // Instruction in D stage is valid
+input kill_f;                                           // Kill instruction in F stage
+
+input branch_predict_taken_d;                           // Branch is predicted taken in D stage
+input [`LM32_PC_RNG] branch_predict_address_d;          // Branch target address
+   
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+input branch_taken_x;                                   // Branch instruction in X stage is taken
+input [`LM32_PC_RNG] branch_target_x;                   // Target PC of X stage branch instruction
+`endif
+input exception_m;
+input branch_taken_m;                                   // Branch instruction in M stage is taken
+input branch_mispredict_taken_m;                        // Branch instruction in M stage is mispredicted as taken
+input [`LM32_PC_RNG] branch_target_m;                   // Target PC of M stage branch instruction
+
+`ifdef CFG_ICACHE_ENABLED
+input iflush;                                           // Flush instruction cache
+`endif
+`ifdef CFG_DCACHE_ENABLED
+input dcache_restart_request;                           // Restart instruction that caused a data cache miss
+input dcache_refill_request;                            // Request to refill data cache
+input dcache_refilling;
+`endif        
+
+`ifdef CFG_IROM_ENABLED
+input [`LM32_WORD_RNG] irom_store_data_m;               // Data from load-store unit
+input [`LM32_WORD_RNG] irom_address_xm;                 // Address from load-store unit
+input irom_we_xm;                                       // Indicates if memory operation is load or store
+`endif
+
+`ifdef CFG_IWB_ENABLED
+input [`LM32_WORD_RNG] i_dat_i;                         // Instruction Wishbone interface read data
+input i_ack_i;                                          // Instruction Wishbone interface acknowledgement
+input i_err_i;                                          // Instruction Wishbone interface error
+`endif
+
+`ifdef CFG_HW_DEBUG_ENABLED
+input jtag_read_enable;                                 // JTAG read memory request
+input jtag_write_enable;                                // JTAG write memory request
+input [`LM32_BYTE_RNG] jtag_write_data;                 // JTAG wrirte data
+input [`LM32_WORD_RNG] jtag_address;                    // JTAG read/write address
+`endif
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+        
+output [`LM32_PC_RNG] pc_f;                             // F stage PC
+reg    [`LM32_PC_RNG] pc_f;
+output [`LM32_PC_RNG] pc_d;                             // D stage PC
+reg    [`LM32_PC_RNG] pc_d;
+output [`LM32_PC_RNG] pc_x;                             // X stage PC
+reg    [`LM32_PC_RNG] pc_x;
+output [`LM32_PC_RNG] pc_m;                             // M stage PC
+reg    [`LM32_PC_RNG] pc_m;
+output [`LM32_PC_RNG] pc_w;                             // W stage PC
+reg    [`LM32_PC_RNG] pc_w;
+
+`ifdef CFG_ICACHE_ENABLED
+output icache_stall_request;                            // Instruction cache stall request
+wire   icache_stall_request;
+output icache_restart_request;                          // Request to restart instruction that cached instruction cache miss
+wire   icache_restart_request;
+output icache_refill_request;                           // Instruction cache refill request
+wire   icache_refill_request;
+output icache_refilling;                                // Indicates the icache is refilling
+wire   icache_refilling;
+`endif
+
+`ifdef CFG_IROM_ENABLED
+output [`LM32_WORD_RNG] irom_data_m;                    // Data to load-store unit on load
+wire   [`LM32_WORD_RNG] irom_data_m;                      
+`endif   
+
+`ifdef CFG_IWB_ENABLED
+output [`LM32_WORD_RNG] i_dat_o;                        // Instruction Wishbone interface write data
+`ifdef CFG_HW_DEBUG_ENABLED
+reg    [`LM32_WORD_RNG] i_dat_o;
+`else
+wire   [`LM32_WORD_RNG] i_dat_o;
+`endif
+output [`LM32_WORD_RNG] i_adr_o;                        // Instruction Wishbone interface address
+reg    [`LM32_WORD_RNG] i_adr_o;
+output i_cyc_o;                                         // Instruction Wishbone interface cycle
+reg    i_cyc_o; 
+output [`LM32_BYTE_SELECT_RNG] i_sel_o;                 // Instruction Wishbone interface byte select
+`ifdef CFG_HW_DEBUG_ENABLED
+reg    [`LM32_BYTE_SELECT_RNG] i_sel_o;
+`else
+wire   [`LM32_BYTE_SELECT_RNG] i_sel_o;
+`endif
+output i_stb_o;                                         // Instruction Wishbone interface strobe
+reg    i_stb_o;
+output i_we_o;                                          // Instruction Wishbone interface write enable
+`ifdef CFG_HW_DEBUG_ENABLED
+reg    i_we_o;
+`else
+wire   i_we_o;
+`endif
+output [`LM32_CTYPE_RNG] i_cti_o;                       // Instruction Wishbone interface cycle type 
+reg    [`LM32_CTYPE_RNG] i_cti_o;
+output i_lock_o;                                        // Instruction Wishbone interface lock bus
+reg    i_lock_o;
+output [`LM32_BTYPE_RNG] i_bte_o;                       // Instruction Wishbone interface burst type 
+wire   [`LM32_BTYPE_RNG] i_bte_o;
+`endif
+
+`ifdef CFG_HW_DEBUG_ENABLED
+output [`LM32_BYTE_RNG] jtag_read_data;                 // Data read for JTAG interface
+reg    [`LM32_BYTE_RNG] jtag_read_data;
+output jtag_access_complete;                            // Requested memory access by JTAG interface is complete
+wire   jtag_access_complete;
+`endif
+
+`ifdef CFG_BUS_ERRORS_ENABLED
+output bus_error_d;                                     // Indicates a bus error occured while fetching the instruction
+reg    bus_error_d;
+`endif
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+output [`LM32_INSTRUCTION_RNG] instruction_f;           // F stage instruction (only to have register indices extracted from)
+wire   [`LM32_INSTRUCTION_RNG] instruction_f;
+`endif
+output [`LM32_INSTRUCTION_RNG] instruction_d;           // D stage instruction to be decoded
+reg    [`LM32_INSTRUCTION_RNG] instruction_d;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+reg [`LM32_PC_RNG] pc_a;                                // A stage PC
+
+`ifdef LM32_CACHE_ENABLED
+reg [`LM32_PC_RNG] restart_address;                     // Address to restart from after a cache miss  
+`endif
+
+`ifdef CFG_ICACHE_ENABLED
+wire icache_read_enable_f;                              // Indicates if instruction cache miss is valid
+wire [`LM32_PC_RNG] icache_refill_address;              // Address that caused cache miss
+reg icache_refill_ready;                                // Indicates when next word of refill data is ready to be written to cache
+reg [`LM32_INSTRUCTION_RNG] icache_refill_data;         // Next word of refill data, fetched from Wishbone
+wire [`LM32_INSTRUCTION_RNG] icache_data_f;             // Instruction fetched from instruction cache
+wire [`LM32_CTYPE_RNG] first_cycle_type;                // First Wishbone cycle type
+wire [`LM32_CTYPE_RNG] next_cycle_type;                 // Next Wishbone cycle type
+wire last_word;                                         // Indicates if this is the last word in the cache line
+wire [`LM32_PC_RNG] first_address;                      // First cache refill address
+`else
+`ifdef CFG_IWB_ENABLED
+reg [`LM32_INSTRUCTION_RNG] wb_data_f;                  // Instruction fetched from Wishbone
+`endif
+`endif
+`ifdef CFG_IROM_ENABLED
+wire irom_select_a;                                     // Indicates if A stage PC maps to a ROM address
+reg irom_select_f;                                      // Indicates if F stage PC maps to a ROM address
+wire [`LM32_INSTRUCTION_RNG] irom_data_f;               // Instruction fetched from ROM
+`endif
+`ifdef CFG_EBR_POSEDGE_REGISTER_FILE
+`else
+wire [`LM32_INSTRUCTION_RNG] instruction_f;             // F stage instruction
+`endif
+`ifdef CFG_BUS_ERRORS_ENABLED
+reg bus_error_f;                                        // Indicates if a bus error occured while fetching the instruction in the F stage
+`endif
+
+`ifdef CFG_HW_DEBUG_ENABLED
+reg jtag_access;                                        // Indicates if a JTAG WB access is in progress
+`endif
+
+`ifdef CFG_ALTERNATE_EBA
+reg alternate_eba_taken;
+`endif
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+
+/////////////////////////////////////////////////////
+// Instantiations
+/////////////////////////////////////////////////////
+
+// Instruction ROM
+`ifdef CFG_IROM_ENABLED  
+   pmi_ram_dp_true 
+     #(
+       // ----- Parameters -------
+       .pmi_family             (`LATTICE_FAMILY),
+        
+       //.pmi_addr_depth_a       (1 << (clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_addr_width_a       ((clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_data_width_a       (`LM32_WORD_WIDTH),
+       //.pmi_addr_depth_b       (1 << (clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_addr_width_b       ((clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_data_width_b       (`LM32_WORD_WIDTH),
+        
+       .pmi_addr_depth_a       (`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1),
+       .pmi_addr_width_a       (clogb2_v1(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)),
+       .pmi_data_width_a       (`LM32_WORD_WIDTH),
+       .pmi_addr_depth_b       (`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1),
+       .pmi_addr_width_b       (clogb2_v1(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)),
+       .pmi_data_width_b       (`LM32_WORD_WIDTH),
+        
+       .pmi_regmode_a          ("noreg"),
+       .pmi_regmode_b          ("noreg"),
+       .pmi_gsr                ("enable"),
+       .pmi_resetmode          ("sync"),
+       .pmi_init_file          (`CFG_IROM_INIT_FILE),
+       .pmi_init_file_format   (`CFG_IROM_INIT_FILE_FORMAT),
+       .module_type            ("pmi_ram_dp_true")
+       ) 
+       ram (
+           // ----- Inputs -------
+           .ClockA                 (clk_i),
+           .ClockB                 (clk_i),
+           .ResetA                 (rst_i),
+           .ResetB                 (rst_i),
+           .DataInA                ({32{1'b0}}),
+           .DataInB                (irom_store_data_m),
+           .AddressA               (pc_a[(clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)+2-1:2]),
+           .AddressB               (irom_address_xm[(clogb2(`CFG_IROM_LIMIT/4-`CFG_IROM_BASE_ADDRESS/4+1)-1)+2-1:2]),
+           .ClockEnA               (!stall_a),
+           .ClockEnB               (!stall_x || !stall_m),
+           .WrA                    (`FALSE),
+           .WrB                    (irom_we_xm), 
+           // ----- Outputs -------
+           .QA                     (irom_data_f),
+           .QB                     (irom_data_m)
+           );
+`endif    
+`ifdef CFG_ICACHE_ENABLED
+// Instruction cache
+lm32_icache #(
+    .associativity          (associativity),
+    .sets                   (sets),
+    .bytes_per_line         (bytes_per_line),
+    .base_address           (base_address),
+    .limit                  (limit)
+    ) icache ( 
+    // ----- Inputs -----
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),      
+    .stall_a                (stall_a),
+    .stall_f                (stall_f),
+    .branch_predict_taken_d (branch_predict_taken_d),
+    .valid_d                (valid_d),
+    .address_a              (pc_a),
+    .address_f              (pc_f),
+    .read_enable_f          (icache_read_enable_f),
+    .refill_ready           (icache_refill_ready),
+    .refill_data            (icache_refill_data),
+    .iflush                 (iflush),
+    // ----- Outputs -----
+    .stall_request          (icache_stall_request),
+    .restart_request        (icache_restart_request),
+    .refill_request         (icache_refill_request),
+    .refill_address         (icache_refill_address),
+    .refilling              (icache_refilling),
+    .inst                   (icache_data_f)
+    );
+`endif
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+`ifdef CFG_ICACHE_ENABLED
+// Generate signal that indicates when instruction cache misses are valid
+assign icache_read_enable_f =    (valid_f == `TRUE)
+                              && (kill_f == `FALSE)
+`ifdef CFG_DCACHE_ENABLED
+                              && (dcache_restart_request == `FALSE)
+`endif                         
+`ifdef CFG_IROM_ENABLED 
+                              && (irom_select_f == `FALSE)
+`endif       
+                              ;
+`endif
+
+// Compute address of next instruction to fetch
+always @(*)
+begin
+    // The request from the latest pipeline stage must take priority
+`ifdef CFG_DCACHE_ENABLED
+    if (dcache_restart_request == `TRUE)
+        pc_a = restart_address;
+    else 
+`endif    
+      if (branch_taken_m == `TRUE)
+       if ((branch_mispredict_taken_m == `TRUE) && (exception_m == `FALSE))
+         pc_a = pc_x;
+       else
+          pc_a = branch_target_m;
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+      else if (branch_taken_x == `TRUE)
+        pc_a = branch_target_x;
+`endif
+      else
+       if ( (valid_d == `TRUE) && (branch_predict_taken_d == `TRUE) )
+         pc_a = branch_predict_address_d;
+       else
+`ifdef CFG_ICACHE_ENABLED
+          if (icache_restart_request == `TRUE)
+            pc_a = restart_address;
+         else 
+`endif        
+            pc_a = pc_f + 1'b1;
+end
+
+// Select where instruction should be fetched from
+`ifdef CFG_IROM_ENABLED
+assign irom_select_a = ({pc_a, 2'b00} >= `CFG_IROM_BASE_ADDRESS) && ({pc_a, 2'b00} <= `CFG_IROM_LIMIT);
+`endif
+                     
+// Select instruction from selected source
+`ifdef CFG_ICACHE_ENABLED
+`ifdef CFG_IROM_ENABLED
+assign instruction_f = irom_select_f == `TRUE ? irom_data_f : icache_data_f;
+`else
+assign instruction_f = icache_data_f;
+`endif
+`else
+`ifdef CFG_IROM_ENABLED
+`ifdef CFG_IWB_ENABLED
+assign instruction_f = irom_select_f == `TRUE ? irom_data_f : wb_data_f;
+`else
+assign instruction_f = irom_data_f;
+`endif
+`else
+assign instruction_f = wb_data_f;
+`endif
+`endif
+
+// Unused/constant Wishbone signals
+`ifdef CFG_IWB_ENABLED
+`ifdef CFG_HW_DEBUG_ENABLED
+`else
+assign i_dat_o = 32'd0;
+assign i_we_o = `FALSE;
+assign i_sel_o = 4'b1111;
+`endif
+assign i_bte_o = `LM32_BTYPE_LINEAR;
+`endif
+
+`ifdef CFG_ICACHE_ENABLED
+// Determine parameters for next cache refill Wishbone access                
+generate
+    case (bytes_per_line)
+    4:
+    begin
+assign first_cycle_type = `LM32_CTYPE_END;
+assign next_cycle_type = `LM32_CTYPE_END;
+assign last_word = `TRUE;
+assign first_address = icache_refill_address;
+    end
+    8:
+    begin
+assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
+assign next_cycle_type = `LM32_CTYPE_END;
+assign last_word = i_adr_o[addr_offset_msb:addr_offset_lsb] == 1'b1;
+assign first_address = {icache_refill_address[`LM32_PC_WIDTH+2-1:addr_offset_msb+1], {addr_offset_width{1'b0}}};
+    end
+    16:
+    begin
+assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
+assign next_cycle_type = i_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING;
+assign last_word = i_adr_o[addr_offset_msb:addr_offset_lsb] == 2'b11;
+assign first_address = {icache_refill_address[`LM32_PC_WIDTH+2-1:addr_offset_msb+1], {addr_offset_width{1'b0}}};
+    end
+    endcase
+endgenerate
+`endif
+                     
+/////////////////////////////////////////////////////
+// Sequential Logic
+/////////////////////////////////////////////////////
+
+// PC 
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+        if (at_debug == `TRUE)
+            pc_f <= (`CFG_DEBA_RESET-4)/4;
+        else
+            pc_f <= (`CFG_EBA_RESET-4)/4;
+ `else
+        pc_f <= (`CFG_EBA_RESET-4)/4;
+ `endif
+`else
+        pc_f <= (`CFG_EBA_RESET-4)/4;
+`endif
+        pc_d <= {`LM32_PC_WIDTH{1'b0}};
+        pc_x <= {`LM32_PC_WIDTH{1'b0}};
+        pc_m <= {`LM32_PC_WIDTH{1'b0}};
+        pc_w <= {`LM32_PC_WIDTH{1'b0}};
+    end
+    else
+    begin
+        if (stall_f == `FALSE)
+            pc_f <= pc_a;
+        if (stall_d == `FALSE)
+            pc_d <= pc_f;
+        if (stall_x == `FALSE)
+            pc_x <= pc_d;
+        if (stall_m == `FALSE)
+            pc_m <= pc_x;
+        pc_w <= pc_m;
+    end
+end
+
+`ifdef LM32_CACHE_ENABLED
+// Address to restart from after a cache miss has been handled
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        restart_address <= {`LM32_PC_WIDTH{1'b0}};
+    else
+    begin
+`ifdef CFG_DCACHE_ENABLED
+`ifdef CFG_ICACHE_ENABLED        
+            // D-cache restart address must take priority, otherwise instructions will be lost
+            if (dcache_refill_request == `TRUE)
+                restart_address <= pc_w;
+            else if ((icache_refill_request == `TRUE) && (!dcache_refilling) && (!dcache_restart_request))
+                restart_address <= icache_refill_address;
+`else
+            if (dcache_refill_request == `TRUE)
+                restart_address <= pc_w;
+`endif
+`else
+`ifdef CFG_ICACHE_ENABLED        
+            if (icache_refill_request == `TRUE)
+                restart_address <= icache_refill_address;
+`endif
+`endif
+    end
+end
+`endif
+
+// Record where instruction was fetched from
+`ifdef CFG_IROM_ENABLED
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+        irom_select_f <= `FALSE;
+    else
+    begin
+        if (stall_f == `FALSE)
+            irom_select_f <= irom_select_a;
+    end
+end
+`endif
+
+`ifdef CFG_HW_DEBUG_ENABLED
+assign jtag_access_complete = (i_cyc_o == `TRUE) && ((i_ack_i == `TRUE) || (i_err_i == `TRUE)) && (jtag_access == `TRUE);
+always @(*)
+begin
+    case (jtag_address[1:0])
+    2'b00: jtag_read_data = i_dat_i[`LM32_BYTE_3_RNG];
+    2'b01: jtag_read_data = i_dat_i[`LM32_BYTE_2_RNG];
+    2'b10: jtag_read_data = i_dat_i[`LM32_BYTE_1_RNG];
+    2'b11: jtag_read_data = i_dat_i[`LM32_BYTE_0_RNG];
+    endcase 
+end
+`endif
+
+`ifdef CFG_IWB_ENABLED
+// Instruction Wishbone interface
+`ifdef CFG_ICACHE_ENABLED                
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        i_cyc_o <= `FALSE;
+        i_stb_o <= `FALSE;
+        i_adr_o <= {`LM32_WORD_WIDTH{1'b0}};
+        i_cti_o <= `LM32_CTYPE_END;
+        i_lock_o <= `FALSE;
+        icache_refill_data <= {`LM32_INSTRUCTION_WIDTH{1'b0}};
+        icache_refill_ready <= `FALSE;
+`ifdef CFG_BUS_ERRORS_ENABLED
+        bus_error_f <= `FALSE;
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+        i_we_o <= `FALSE;
+        i_sel_o <= 4'b1111;
+        jtag_access <= `FALSE;
+`endif
+    end
+    else
+    begin   
+        icache_refill_ready <= `FALSE;
+        // Is a cycle in progress?
+        if (i_cyc_o == `TRUE)
+        begin
+            // Has cycle completed?
+            if ((i_ack_i == `TRUE) || (i_err_i == `TRUE))
+            begin
+`ifdef CFG_HW_DEBUG_ENABLED
+                if (jtag_access == `TRUE)
+                begin
+                    i_cyc_o <= `FALSE;
+                    i_stb_o <= `FALSE;       
+                    i_we_o <= `FALSE;  
+                    jtag_access <= `FALSE;    
+                end
+                else
+`endif
+                begin
+                    if (last_word == `TRUE)
+                    begin
+                        // Cache line fill complete 
+                        i_cyc_o <= `FALSE;
+                        i_stb_o <= `FALSE;
+                        i_lock_o <= `FALSE;
+                    end
+                    // Fetch next word in cache line
+                    i_adr_o[addr_offset_msb:addr_offset_lsb] <= i_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1;
+                    i_cti_o <= next_cycle_type;
+                    // Write fetched data into instruction cache
+                    icache_refill_ready <= `TRUE;
+                    icache_refill_data <= i_dat_i;
+                end
+            end
+`ifdef CFG_BUS_ERRORS_ENABLED
+            if (i_err_i == `TRUE)
+            begin
+                bus_error_f <= `TRUE;
+                $display ("Instruction bus error. Address: %x", i_adr_o);
+            end
+`endif
+        end
+        else
+        begin
+            if ((icache_refill_request == `TRUE) && (icache_refill_ready == `FALSE))
+            begin
+                // Read first word of cache line
+`ifdef CFG_HW_DEBUG_ENABLED     
+                i_sel_o <= 4'b1111;
+`endif
+                i_adr_o <= {first_address, 2'b00};
+                i_cyc_o <= `TRUE;
+                i_stb_o <= `TRUE;                
+                i_cti_o <= first_cycle_type;
+                //i_lock_o <= `TRUE;
+`ifdef CFG_BUS_ERRORS_ENABLED
+                bus_error_f <= `FALSE;
+`endif
+            end
+`ifdef CFG_HW_DEBUG_ENABLED
+            else
+            begin
+                if ((jtag_read_enable == `TRUE) || (jtag_write_enable == `TRUE))
+                begin
+                    case (jtag_address[1:0])
+                    2'b00: i_sel_o <= 4'b1000;
+                    2'b01: i_sel_o <= 4'b0100;
+                    2'b10: i_sel_o <= 4'b0010;
+                    2'b11: i_sel_o <= 4'b0001;
+                    endcase
+                    i_adr_o <= jtag_address;
+                    i_dat_o <= {4{jtag_write_data}};
+                    i_cyc_o <= `TRUE;
+                    i_stb_o <= `TRUE;
+                    i_we_o <= jtag_write_enable;
+                    i_cti_o <= `LM32_CTYPE_END;
+                    jtag_access <= `TRUE;
+                end
+            end 
+`endif                    
+`ifdef CFG_BUS_ERRORS_ENABLED
+            // Clear bus error when exception taken, otherwise they would be 
+            // continually generated if exception handler is cached
+`ifdef CFG_FAST_UNCONDITIONAL_BRANCH    
+            if (branch_taken_x == `TRUE)
+                bus_error_f <= `FALSE;
+`endif
+            if (branch_taken_m == `TRUE)
+                bus_error_f <= `FALSE;
+`endif
+        end
+    end
+end
+`else
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        i_cyc_o <= `FALSE;
+        i_stb_o <= `FALSE;
+        i_adr_o <= {`LM32_WORD_WIDTH{1'b0}};
+        i_cti_o <= `LM32_CTYPE_END;
+        i_lock_o <= `FALSE;
+        wb_data_f <= {`LM32_INSTRUCTION_WIDTH{1'b0}};
+`ifdef CFG_BUS_ERRORS_ENABLED
+        bus_error_f <= `FALSE;
+`endif
+    end
+    else
+    begin   
+        // Is a cycle in progress?
+        if (i_cyc_o == `TRUE)
+        begin
+            // Has cycle completed?
+            if((i_ack_i == `TRUE) || (i_err_i == `TRUE))
+            begin
+                // Cycle complete
+                i_cyc_o <= `FALSE;
+                i_stb_o <= `FALSE;
+                // Register fetched instruction
+                wb_data_f <= i_dat_i;
+            end
+`ifdef CFG_BUS_ERRORS_ENABLED
+            if (i_err_i == `TRUE)
+            begin
+                bus_error_f <= `TRUE;
+                $display ("Instruction bus error. Address: %x", i_adr_o);
+            end
+`endif
+        end
+        else
+        begin
+            // Wait for an instruction fetch from an external address 
+            if (   (stall_a == `FALSE) 
+`ifdef CFG_IROM_ENABLED 
+                && (irom_select_a == `FALSE)
+`endif       
+               )
+            begin
+                // Fetch instruction
+`ifdef CFG_HW_DEBUG_ENABLED     
+                i_sel_o <= 4'b1111;
+`endif
+                i_adr_o <= {pc_a, 2'b00};
+                i_cyc_o <= `TRUE;
+                i_stb_o <= `TRUE;
+`ifdef CFG_BUS_ERRORS_ENABLED
+                bus_error_f <= `FALSE;
+`endif
+            end
+           else
+           begin
+               if (   (stall_a == `FALSE) 
+`ifdef CFG_IROM_ENABLED 
+                   && (irom_select_a == `TRUE)
+`endif       
+                  )
+               begin
+`ifdef CFG_BUS_ERRORS_ENABLED
+                   bus_error_f <= `FALSE;
+`endif
+               end
+           end
+        end
+    end
+end
+`endif
+`endif
+
+// Instruction register
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        instruction_d <= {`LM32_INSTRUCTION_WIDTH{1'b0}};
+`ifdef CFG_BUS_ERRORS_ENABLED
+        bus_error_d <= `FALSE;
+`endif
+    end
+    else
+    begin
+        if (stall_d == `FALSE)
+        begin
+            instruction_d <= instruction_f;
+`ifdef CFG_BUS_ERRORS_ENABLED
+            bus_error_d <= bus_error_f;
+`endif
+        end
+    end
+end  
+  
+endmodule
diff --git a/verilog/lm32/lm32_interrupt.v b/verilog/lm32/lm32_interrupt.v
new file mode 100644 (file)
index 0000000..41f9a14
--- /dev/null
@@ -0,0 +1,356 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_interrupt.v
+// Title            : Interrupt logic
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_interrupt (
+    // ----- Inputs -------
+    clk_i, 
+    rst_i,
+    // From external devices
+    interrupt,
+    // From pipeline
+    stall_x,
+`ifdef CFG_DEBUG_ENABLED
+    non_debug_exception,
+    debug_exception,
+`else
+    exception,
+`endif
+    eret_q_x,
+`ifdef CFG_DEBUG_ENABLED
+    bret_q_x,
+`endif
+    csr,
+    csr_write_data,
+    csr_write_enable,
+    // ----- Outputs -------
+    interrupt_exception,
+    // To pipeline
+    csr_read_data
+    );
+
+/////////////////////////////////////////////////////
+// Parameters
+/////////////////////////////////////////////////////
+
+parameter interrupts = `CFG_INTERRUPTS;         // Number of interrupts
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                    // Clock
+input rst_i;                                    // Reset
+
+input [interrupts-1:0] interrupt;               // Interrupt pins, active-low
+
+input stall_x;                                  // Stall X pipeline stage
+
+`ifdef CFG_DEBUG_ENABLED
+input non_debug_exception;                      // Non-debug related exception has been raised
+input debug_exception;                          // Debug-related exception has been raised
+`else
+input exception;                                // Exception has been raised
+`endif
+input eret_q_x;                                 // Return from exception 
+`ifdef CFG_DEBUG_ENABLED
+input bret_q_x;                                 // Return from breakpoint 
+`endif
+
+input [`LM32_CSR_RNG] csr;                      // CSR read/write index
+input [`LM32_WORD_RNG] csr_write_data;          // Data to write to specified CSR
+input csr_write_enable;                         // CSR write enable
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output interrupt_exception;                     // Request to raide an interrupt exception
+wire   interrupt_exception;
+
+output [`LM32_WORD_RNG] csr_read_data;          // Data read from CSR
+reg    [`LM32_WORD_RNG] csr_read_data;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+wire [interrupts-1:0] asserted;                 // Which interrupts are currently being asserted
+//pragma attribute asserted preserve_signal true
+wire [interrupts-1:0] interrupt_n_exception;
+
+// Interrupt CSRs
+
+reg ie;                                         // Interrupt enable
+reg eie;                                        // Exception interrupt enable
+`ifdef CFG_DEBUG_ENABLED
+reg bie;                                        // Breakpoint interrupt enable
+`endif
+reg [interrupts-1:0] ip;                        // Interrupt pending
+reg [interrupts-1:0] im;                        // Interrupt mask
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+// Determine which interrupts have occured and are unmasked
+assign interrupt_n_exception = ip & im;
+
+// Determine if any unmasked interrupts have occured
+assign interrupt_exception = (|interrupt_n_exception) & ie;
+
+// Determine which interrupts are currently being asserted (active-low) or are already pending
+assign asserted = ip | interrupt;
+       
+assign ie_csr_read_data = {{`LM32_WORD_WIDTH-3{1'b0}}, 
+`ifdef CFG_DEBUG_ENABLED
+                           bie,
+`else
+                           1'b0,
+`endif                             
+                           eie, 
+                           ie
+                          };
+assign ip_csr_read_data = ip;
+assign im_csr_read_data = im;
+generate
+    if (interrupts > 1) 
+    begin
+// CSR read
+always @(*)
+begin
+    case (csr)
+    `LM32_CSR_IE:  csr_read_data = {{`LM32_WORD_WIDTH-3{1'b0}}, 
+`ifdef CFG_DEBUG_ENABLED
+                                    bie,
+`else
+                                    1'b0,                                     
+`endif
+                                    eie, 
+                                    ie
+                                   };
+    `LM32_CSR_IP:  csr_read_data = ip;
+    `LM32_CSR_IM:  csr_read_data = im;
+    default:       csr_read_data = {`LM32_WORD_WIDTH{1'bx}};
+    endcase
+end
+    end
+    else
+    begin
+// CSR read
+always @(*)
+begin
+    case (csr)
+    `LM32_CSR_IE:  csr_read_data = {{`LM32_WORD_WIDTH-3{1'b0}}, 
+`ifdef CFG_DEBUG_ENABLED
+                                    bie, 
+`else
+                                    1'b0,                                    
+`endif
+                                    eie, 
+                                    ie
+                                   };
+    `LM32_CSR_IP:  csr_read_data = ip;
+    default:       csr_read_data = {`LM32_WORD_WIDTH{1'bx}};
+    endcase
+end
+    end
+endgenerate
+    
+/////////////////////////////////////////////////////
+// Sequential Logic
+/////////////////////////////////////////////////////
+
+generate
+    if (interrupts > 1)
+    begin
+// IE, IM, IP - Interrupt Enable, Interrupt Mask and Interrupt Pending CSRs
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        ie <= `FALSE;
+        eie <= `FALSE;
+`ifdef CFG_DEBUG_ENABLED
+        bie <= `FALSE;
+`endif
+        im <= {interrupts{1'b0}};
+        ip <= {interrupts{1'b0}};
+    end
+    else
+    begin
+        // Set IP bit when interrupt line is asserted
+        ip <= asserted;
+`ifdef CFG_DEBUG_ENABLED
+        if (non_debug_exception == `TRUE)
+        begin
+            // Save and then clear interrupt enable
+            eie <= ie;
+            ie <= `FALSE;
+        end
+        else if (debug_exception == `TRUE)
+        begin
+            // Save and then clear interrupt enable
+            bie <= ie;
+            ie <= `FALSE;
+        end
+`else
+        if (exception == `TRUE)
+        begin
+            // Save and then clear interrupt enable
+            eie <= ie;
+            ie <= `FALSE;
+        end
+`endif
+        else if (stall_x == `FALSE)
+        begin
+            if (eret_q_x == `TRUE)
+                // Restore interrupt enable
+                ie <= eie;          
+`ifdef CFG_DEBUG_ENABLED
+            else if (bret_q_x == `TRUE)
+                // Restore interrupt enable
+                ie <= bie;
+`endif
+            else if (csr_write_enable == `TRUE)
+            begin
+                // Handle wcsr write
+                if (csr == `LM32_CSR_IE)
+                begin
+                    ie <= csr_write_data[0];
+                    eie <= csr_write_data[1];
+`ifdef CFG_DEBUG_ENABLED
+                    bie <= csr_write_data[2];
+`endif
+                end
+                if (csr == `LM32_CSR_IM)
+                    im <= csr_write_data[interrupts-1:0];
+                if (csr == `LM32_CSR_IP)
+                    ip <= asserted & ~csr_write_data[interrupts-1:0];
+            end
+        end
+    end
+end
+    end
+else
+    begin
+// IE, IM, IP - Interrupt Enable, Interrupt Mask and Interrupt Pending CSRs
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        ie <= `FALSE;
+        eie <= `FALSE;
+`ifdef CFG_DEBUG_ENABLED
+        bie <= `FALSE;
+`endif
+        ip <= {interrupts{1'b0}};
+    end
+    else
+    begin
+        // Set IP bit when interrupt line is asserted
+        ip <= asserted;
+`ifdef CFG_DEBUG_ENABLED
+        if (non_debug_exception == `TRUE)
+        begin
+            // Save and then clear interrupt enable
+            eie <= ie;
+            ie <= `FALSE;
+        end
+        else if (debug_exception == `TRUE)
+        begin
+            // Save and then clear interrupt enable
+            bie <= ie;
+            ie <= `FALSE;
+        end
+`else
+        if (exception == `TRUE)
+        begin
+            // Save and then clear interrupt enable
+            eie <= ie;
+            ie <= `FALSE;
+        end
+`endif
+        else if (stall_x == `FALSE)
+        begin
+            if (eret_q_x == `TRUE)
+                // Restore interrupt enable
+                ie <= eie;          
+`ifdef CFG_DEBUG_ENABLED
+            else if (bret_q_x == `TRUE)
+                // Restore interrupt enable
+                ie <= bie;
+`endif
+            else if (csr_write_enable == `TRUE)
+            begin
+                // Handle wcsr write
+                if (csr == `LM32_CSR_IE)
+                begin
+                    ie <= csr_write_data[0];
+                    eie <= csr_write_data[1];
+`ifdef CFG_DEBUG_ENABLED
+                    bie <= csr_write_data[2];
+`endif
+                end
+                if (csr == `LM32_CSR_IP)
+                    ip <= asserted & ~csr_write_data[interrupts-1:0];
+            end
+        end
+    end
+end
+    end
+endgenerate
+
+endmodule
+
diff --git a/verilog/lm32/lm32_jtag.v b/verilog/lm32/lm32_jtag.v
new file mode 100644 (file)
index 0000000..1904ccb
--- /dev/null
@@ -0,0 +1,498 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_jtag.v
+// Title            : JTAG interface
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+
+`ifdef CFG_JTAG_ENABLED
+
+`define LM32_DP                             3'b000
+`define LM32_TX                             3'b001
+`define LM32_RX                             3'b010
+
+// LM32 Debug Protocol commands IDs
+`define LM32_DP_RNG                         3:0
+`define LM32_DP_READ_MEMORY                 4'b0001
+`define LM32_DP_WRITE_MEMORY                4'b0010
+`define LM32_DP_READ_SEQUENTIAL             4'b0011
+`define LM32_DP_WRITE_SEQUENTIAL            4'b0100
+`define LM32_DP_WRITE_CSR                   4'b0101
+`define LM32_DP_BREAK                       4'b0110
+`define LM32_DP_RESET                       4'b0111
+
+// States for FSM
+`define LM32_JTAG_STATE_RNG                 3:0
+`define LM32_JTAG_STATE_READ_COMMAND        4'h0
+`define LM32_JTAG_STATE_READ_BYTE_0         4'h1
+`define LM32_JTAG_STATE_READ_BYTE_1         4'h2
+`define LM32_JTAG_STATE_READ_BYTE_2         4'h3
+`define LM32_JTAG_STATE_READ_BYTE_3         4'h4
+`define LM32_JTAG_STATE_READ_BYTE_4         4'h5
+`define LM32_JTAG_STATE_PROCESS_COMMAND     4'h6
+`define LM32_JTAG_STATE_WAIT_FOR_MEMORY     4'h7
+`define LM32_JTAG_STATE_WAIT_FOR_CSR        4'h8
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_jtag (
+    // ----- Inputs -------
+    clk_i,
+    rst_i,
+    jtag_clk, 
+    jtag_update,
+    jtag_reg_q,
+    jtag_reg_addr_q,
+`ifdef CFG_JTAG_UART_ENABLED
+    csr,
+    csr_write_enable,
+    csr_write_data,
+    stall_x,
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    jtag_read_data,
+    jtag_access_complete,
+`endif
+`ifdef CFG_DEBUG_ENABLED
+    exception_q_w,
+`endif
+    // ----- Outputs -------
+`ifdef CFG_JTAG_UART_ENABLED
+    jtx_csr_read_data,
+    jrx_csr_read_data,
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+    jtag_csr_write_enable,
+    jtag_csr_write_data,
+    jtag_csr,
+    jtag_read_enable,
+    jtag_write_enable,
+    jtag_write_data,
+    jtag_address,
+`endif
+`ifdef CFG_DEBUG_ENABLED
+    jtag_break,
+    jtag_reset,
+`endif
+    jtag_reg_d,
+    jtag_reg_addr_d
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                            // Clock
+input rst_i;                                            // Reset
+
+input jtag_clk;                                         // JTAG clock
+input jtag_update;                                      // JTAG data register has been updated
+input [`LM32_BYTE_RNG] jtag_reg_q;                      // JTAG data register
+input [2:0] jtag_reg_addr_q;                            // JTAG data register
+
+`ifdef CFG_JTAG_UART_ENABLED
+input [`LM32_CSR_RNG] csr;                              // CSR to write
+input csr_write_enable;                                 // CSR write enable
+input [`LM32_WORD_RNG] csr_write_data;                  // Data to write to specified CSR
+input stall_x;                                          // Stall instruction in X stage
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+input [`LM32_BYTE_RNG] jtag_read_data;                  // Data read from requested address
+input jtag_access_complete;                             // Memory access if complete
+`endif
+`ifdef CFG_DEBUG_ENABLED
+input exception_q_w;                                    // Indicates an exception has occured in W stage
+`endif
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+       
+`ifdef CFG_JTAG_UART_ENABLED
+output [`LM32_WORD_RNG] jtx_csr_read_data;              // Value of JTX CSR for rcsr instructions
+wire   [`LM32_WORD_RNG] jtx_csr_read_data;
+output [`LM32_WORD_RNG] jrx_csr_read_data;              // Value of JRX CSR for rcsr instructions
+wire   [`LM32_WORD_RNG] jrx_csr_read_data;
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+output jtag_csr_write_enable;                           // CSR write enable
+reg    jtag_csr_write_enable;
+output [`LM32_WORD_RNG] jtag_csr_write_data;            // Data to write to specified CSR
+wire   [`LM32_WORD_RNG] jtag_csr_write_data;
+output [`LM32_CSR_RNG] jtag_csr;                        // CSR to write
+wire   [`LM32_CSR_RNG] jtag_csr;
+output jtag_read_enable;                                // Memory read enable
+reg    jtag_read_enable;
+output jtag_write_enable;                               // Memory write enable
+reg    jtag_write_enable;
+output [`LM32_BYTE_RNG] jtag_write_data;                // Data to write to specified address
+wire   [`LM32_BYTE_RNG] jtag_write_data;        
+output [`LM32_WORD_RNG] jtag_address;                   // Memory read/write address
+wire   [`LM32_WORD_RNG] jtag_address;
+`endif
+`ifdef CFG_DEBUG_ENABLED
+output jtag_break;                                      // Request to raise a breakpoint exception
+reg    jtag_break;
+output jtag_reset;                                      // Request to raise a reset exception
+reg    jtag_reset;
+`endif
+output [`LM32_BYTE_RNG] jtag_reg_d;
+reg    [`LM32_BYTE_RNG] jtag_reg_d;
+output [2:0] jtag_reg_addr_d;
+wire   [2:0] jtag_reg_addr_d;
+             
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+reg rx_toggle;                          // Clock-domain crossing registers
+reg rx_toggle_r;                        // Registered version of rx_toggle
+reg rx_toggle_r_r;                      // Registered version of rx_toggle_r
+reg rx_toggle_r_r_r;                    // Registered version of rx_toggle_r_r
+
+reg [`LM32_BYTE_RNG] rx_byte;   
+reg [2:0] rx_addr;
+
+`ifdef CFG_JTAG_UART_ENABLED                 
+reg [`LM32_BYTE_RNG] uart_tx_byte;      // UART TX data
+reg uart_tx_valid;                      // TX data is valid
+reg [`LM32_BYTE_RNG] uart_rx_byte;      // UART RX data
+reg uart_rx_valid;                      // RX data is valid
+`endif
+
+reg [`LM32_DP_RNG] command;             // The last received command
+`ifdef CFG_HW_DEBUG_ENABLED
+reg [`LM32_BYTE_RNG] jtag_byte_0;       // Registers to hold command paramaters
+reg [`LM32_BYTE_RNG] jtag_byte_1;
+reg [`LM32_BYTE_RNG] jtag_byte_2;
+reg [`LM32_BYTE_RNG] jtag_byte_3;
+reg [`LM32_BYTE_RNG] jtag_byte_4;
+reg processing;                         // Indicates if we're still processing a memory read/write
+`endif
+
+reg [`LM32_JTAG_STATE_RNG] state;       // Current state of FSM
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+`ifdef CFG_HW_DEBUG_ENABLED
+assign jtag_csr_write_data = {jtag_byte_0, jtag_byte_1, jtag_byte_2, jtag_byte_3};
+assign jtag_csr = jtag_byte_4[`LM32_CSR_RNG];
+assign jtag_address = {jtag_byte_0, jtag_byte_1, jtag_byte_2, jtag_byte_3};
+assign jtag_write_data = jtag_byte_4;
+`endif
+                 
+// Generate status flags for reading via the JTAG interface                 
+`ifdef CFG_JTAG_UART_ENABLED                 
+assign jtag_reg_addr_d[1:0] = {uart_rx_valid, uart_tx_valid};         
+`else
+assign jtag_reg_addr_d[1:0] = 2'b00;
+`endif
+`ifdef CFG_HW_DEBUG_ENABLED
+assign jtag_reg_addr_d[2] = processing;
+`else
+assign jtag_reg_addr_d[2] = 1'b0;
+`endif
+
+`ifdef CFG_JTAG_UART_ENABLED                 
+assign jtx_csr_read_data = {{`LM32_WORD_WIDTH-9{1'b0}}, uart_tx_valid, 8'h00};
+assign jrx_csr_read_data = {{`LM32_WORD_WIDTH-9{1'b0}}, uart_rx_valid, uart_rx_byte};
+`endif         
+                 
+/////////////////////////////////////////////////////
+// Sequential Logic
+/////////////////////////////////////////////////////
+
+// Toggle a flag when a JTAG write occurs
+always @(negedge jtag_update `CFG_RESET_SENSITIVITY)
+begin
+if (rst_i == `TRUE)
+  rx_toggle <= 1'b0;
+else 
+  rx_toggle <= ~rx_toggle;
+end
+
+always @(*)
+begin
+    rx_byte = jtag_reg_q;
+    rx_addr = jtag_reg_addr_q;
+end
+
+// Clock domain crossing from JTAG clock domain to CPU clock domain
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        rx_toggle_r <= 1'b0;
+        rx_toggle_r_r <= 1'b0;
+        rx_toggle_r_r_r <= 1'b0;
+    end
+    else
+    begin
+        rx_toggle_r <= rx_toggle;
+        rx_toggle_r_r <= rx_toggle_r;
+        rx_toggle_r_r_r <= rx_toggle_r_r;
+    end
+end
+
+// LM32 debug protocol state machine
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        state <= `LM32_JTAG_STATE_READ_COMMAND;
+        command <= 4'b0000;
+        jtag_reg_d <= 8'h00;
+`ifdef CFG_HW_DEBUG_ENABLED
+        processing <= `FALSE;
+        jtag_csr_write_enable <= `FALSE;
+        jtag_read_enable <= `FALSE;
+        jtag_write_enable <= `FALSE;
+`endif
+`ifdef CFG_DEBUG_ENABLED
+        jtag_break <= `FALSE;
+        jtag_reset <= `FALSE;
+`endif
+`ifdef CFG_JTAG_UART_ENABLED                 
+        uart_tx_byte <= 8'h00;
+        uart_tx_valid <= `FALSE;
+        uart_rx_byte <= 8'h00;
+        uart_rx_valid <= `FALSE;
+`endif
+    end
+    else
+    begin
+`ifdef CFG_JTAG_UART_ENABLED                 
+        if ((csr_write_enable == `TRUE) && (stall_x == `FALSE))
+        begin
+            case (csr)
+            `LM32_CSR_JTX:
+            begin
+                // Set flag indicating data is available
+                uart_tx_byte <= csr_write_data[`LM32_BYTE_0_RNG];
+                uart_tx_valid <= `TRUE;
+            end
+            `LM32_CSR_JRX:
+            begin
+                // Clear flag indidicating data has been received
+                uart_rx_valid <= `FALSE;
+            end
+            endcase
+        end
+`endif
+`ifdef CFG_DEBUG_ENABLED
+        // When an exception has occured, clear the requests
+        if (exception_q_w == `TRUE)
+        begin
+            jtag_break <= `FALSE;
+            jtag_reset <= `FALSE;
+        end
+`endif
+        case (state)
+        `LM32_JTAG_STATE_READ_COMMAND:
+        begin
+            // Wait for rx register to toggle which indicates new data is available
+            if (rx_toggle_r_r != rx_toggle_r_r_r)
+            begin
+                command <= rx_byte[7:4];                
+                case (rx_addr)
+`ifdef CFG_DEBUG_ENABLED
+                `LM32_DP:
+                begin
+                    case (rx_byte[7:4])
+`ifdef CFG_HW_DEBUG_ENABLED
+                    `LM32_DP_READ_MEMORY:
+                        state <= `LM32_JTAG_STATE_READ_BYTE_0;
+                    `LM32_DP_READ_SEQUENTIAL:
+                    begin
+                        {jtag_byte_2, jtag_byte_3} <= {jtag_byte_2, jtag_byte_3} + 1'b1;
+                        state <= `LM32_JTAG_STATE_PROCESS_COMMAND;
+                    end
+                    `LM32_DP_WRITE_MEMORY:
+                        state <= `LM32_JTAG_STATE_READ_BYTE_0;
+                    `LM32_DP_WRITE_SEQUENTIAL:
+                    begin
+                        {jtag_byte_2, jtag_byte_3} <= {jtag_byte_2, jtag_byte_3} + 1'b1;
+                        state <= 5;
+                    end
+                    `LM32_DP_WRITE_CSR:
+                        state <= `LM32_JTAG_STATE_READ_BYTE_0;
+`endif                    
+                    `LM32_DP_BREAK:
+                    begin
+`ifdef CFG_JTAG_UART_ENABLED     
+                        uart_rx_valid <= `FALSE;    
+                        uart_tx_valid <= `FALSE;         
+`endif
+                        jtag_break <= `TRUE;
+                    end
+                    `LM32_DP_RESET:
+                    begin
+`ifdef CFG_JTAG_UART_ENABLED     
+                        uart_rx_valid <= `FALSE;    
+                        uart_tx_valid <= `FALSE;         
+`endif
+                        jtag_reset <= `TRUE;
+                    end
+                    endcase                               
+                end
+`endif
+`ifdef CFG_JTAG_UART_ENABLED                 
+                `LM32_TX:
+                begin
+                    uart_rx_byte <= rx_byte;
+                    uart_rx_valid <= `TRUE;
+                end                    
+                `LM32_RX:
+                begin
+                    jtag_reg_d <= uart_tx_byte;
+                    uart_tx_valid <= `FALSE;
+                end
+`endif
+                default:
+                    ;
+                endcase                
+            end
+        end
+`ifdef CFG_HW_DEBUG_ENABLED
+        `LM32_JTAG_STATE_READ_BYTE_0:
+        begin
+            if (rx_toggle_r_r != rx_toggle_r_r_r)
+            begin
+                jtag_byte_0 <= rx_byte;
+                state <= `LM32_JTAG_STATE_READ_BYTE_1;
+            end
+        end
+        `LM32_JTAG_STATE_READ_BYTE_1:
+        begin
+            if (rx_toggle_r_r != rx_toggle_r_r_r)
+            begin
+                jtag_byte_1 <= rx_byte;
+                state <= `LM32_JTAG_STATE_READ_BYTE_2;
+            end
+        end
+        `LM32_JTAG_STATE_READ_BYTE_2:
+        begin
+            if (rx_toggle_r_r != rx_toggle_r_r_r)
+            begin
+                jtag_byte_2 <= rx_byte;
+                state <= `LM32_JTAG_STATE_READ_BYTE_3;
+            end
+        end
+        `LM32_JTAG_STATE_READ_BYTE_3:
+        begin
+            if (rx_toggle_r_r != rx_toggle_r_r_r)
+            begin
+                jtag_byte_3 <= rx_byte;
+                if (command == `LM32_DP_READ_MEMORY)
+                    state <= `LM32_JTAG_STATE_PROCESS_COMMAND;
+                else 
+                    state <= `LM32_JTAG_STATE_READ_BYTE_4;
+            end
+        end
+        `LM32_JTAG_STATE_READ_BYTE_4:
+        begin
+            if (rx_toggle_r_r != rx_toggle_r_r_r)
+            begin
+                jtag_byte_4 <= rx_byte;
+                state <= `LM32_JTAG_STATE_PROCESS_COMMAND;
+            end
+        end
+        `LM32_JTAG_STATE_PROCESS_COMMAND:
+        begin
+            case (command)
+            `LM32_DP_READ_MEMORY,
+            `LM32_DP_READ_SEQUENTIAL:
+            begin
+                jtag_read_enable <= `TRUE;
+                processing <= `TRUE;
+                state <= `LM32_JTAG_STATE_WAIT_FOR_MEMORY;
+            end
+            `LM32_DP_WRITE_MEMORY,
+            `LM32_DP_WRITE_SEQUENTIAL:
+            begin
+                jtag_write_enable <= `TRUE;
+                processing <= `TRUE;
+                state <= `LM32_JTAG_STATE_WAIT_FOR_MEMORY;
+            end
+            `LM32_DP_WRITE_CSR:
+            begin
+                jtag_csr_write_enable <= `TRUE;
+                processing <= `TRUE;
+                state <= `LM32_JTAG_STATE_WAIT_FOR_CSR;
+            end
+            endcase
+        end
+        `LM32_JTAG_STATE_WAIT_FOR_MEMORY:
+        begin
+            if (jtag_access_complete == `TRUE)
+            begin          
+                jtag_read_enable <= `FALSE;
+                jtag_reg_d <= jtag_read_data;
+                jtag_write_enable <= `FALSE;  
+                processing <= `FALSE;
+                state <= `LM32_JTAG_STATE_READ_COMMAND;
+            end
+        end    
+        `LM32_JTAG_STATE_WAIT_FOR_CSR:
+        begin
+            jtag_csr_write_enable <= `FALSE;
+            processing <= `FALSE;
+            state <= `LM32_JTAG_STATE_READ_COMMAND;
+        end    
+`endif
+        endcase
+    end
+end
+  
+endmodule
+
+`endif
diff --git a/verilog/lm32/lm32_load_store_unit.v b/verilog/lm32/lm32_load_store_unit.v
new file mode 100644 (file)
index 0000000..4a86e7b
--- /dev/null
@@ -0,0 +1,829 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project      : LatticeMico32
+// File         : lm32_load_store_unit.v
+// Title        : Load and store unit
+// Dependencies : lm32_include.v
+// Version      : 6.1.17
+//              : Initial Release
+// Version      : 7.0SP2, 3.0
+//              : No Change
+// Version      : 3.1
+//              : Instead of disallowing an instruction cache miss on a data cache 
+//              : miss, both can now occur at the same time. If both occur at same 
+//              : time, then restart address is the address of instruction that 
+//              : caused data cache miss.
+// Version      : 3.2
+//              : EBRs use SYNC resets instead of ASYNC resets.
+// Version      : 3.3
+//              : Support for new non-cacheable Data Memory that is accessible by 
+//              : the data port and has a one cycle access latency.
+// Version      : 3.4
+//              : No change
+// Version      : 3.5
+//              : Bug fix: Inline memory is correctly generated if it is not a
+//              : power-of-two
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_load_store_unit (
+    // ----- Inputs -------
+    clk_i,
+    rst_i,
+    // From pipeline
+    stall_a,
+    stall_x,
+    stall_m,
+    kill_x,
+    kill_m,
+    exception_m,
+    store_operand_x,
+    load_store_address_x,
+    load_store_address_m,
+    load_store_address_w,
+    load_x,
+    store_x,
+    load_q_x,
+    store_q_x,
+    load_q_m,
+    store_q_m,
+    sign_extend_x,
+    size_x,
+`ifdef CFG_DCACHE_ENABLED
+    dflush,
+`endif
+`ifdef CFG_IROM_ENABLED
+    irom_data_m,
+`endif
+    // From Wishbone
+    d_dat_i,
+    d_ack_i,
+    d_err_i,
+    d_rty_i,
+    // ----- Outputs -------
+    // To pipeline
+`ifdef CFG_DCACHE_ENABLED
+    dcache_refill_request,
+    dcache_restart_request,
+    dcache_stall_request,
+    dcache_refilling,
+`endif    
+`ifdef CFG_IROM_ENABLED
+    irom_store_data_m,
+    irom_address_xm,
+    irom_we_xm,
+    irom_stall_request_x,
+`endif                      
+    load_data_w,
+    stall_wb_load,
+    // To Wishbone
+    d_dat_o,
+    d_adr_o,
+    d_cyc_o,
+    d_sel_o,
+    d_stb_o,
+    d_we_o,
+    d_cti_o,
+    d_lock_o,
+    d_bte_o
+    );
+
+/////////////////////////////////////////////////////
+// Parameters
+/////////////////////////////////////////////////////
+
+parameter associativity = 1;                            // Associativity of the cache (Number of ways)
+parameter sets = 512;                                   // Number of sets
+parameter bytes_per_line = 16;                          // Number of bytes per cache line
+parameter base_address = 0;                             // Base address of cachable memory
+parameter limit = 0;                                    // Limit (highest address) of cachable memory
+
+// For bytes_per_line == 4, we set 1 so part-select range isn't reversed, even though not really used 
+localparam addr_offset_width = bytes_per_line == 4 ? 1 : clogb2(bytes_per_line)-1-2;
+localparam addr_offset_lsb = 2;
+localparam addr_offset_msb = (addr_offset_lsb+addr_offset_width-1);
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                            // Clock 
+input rst_i;                                            // Reset
+
+input stall_a;                                          // A stage stall 
+input stall_x;                                          // X stage stall        
+input stall_m;                                          // M stage stall
+input kill_x;                                           // Kill instruction in X stage
+input kill_m;                                           // Kill instruction in M stage
+input exception_m;                                      // An exception occured in the M stage
+
+input [`LM32_WORD_RNG] store_operand_x;                 // Data read from register to store
+input [`LM32_WORD_RNG] load_store_address_x;            // X stage load/store address
+input [`LM32_WORD_RNG] load_store_address_m;            // M stage load/store address
+input [1:0] load_store_address_w;                       // W stage load/store address (only least two significant bits are needed)
+input load_x;                                           // Load instruction in X stage
+input store_x;                                          // Store instruction in X stage
+input load_q_x;                                         // Load instruction in X stage
+input store_q_x;                                        // Store instruction in X stage
+input load_q_m;                                         // Load instruction in M stage
+input store_q_m;                                        // Store instruction in M stage
+input sign_extend_x;                                    // Whether load instruction in X stage should sign extend or zero extend
+input [`LM32_SIZE_RNG] size_x;                          // Size of load or store (byte, hword, word)
+
+`ifdef CFG_DCACHE_ENABLED
+input dflush;                                           // Flush the data cache
+`endif
+
+`ifdef CFG_IROM_ENABLED   
+input [`LM32_WORD_RNG] irom_data_m;                     // Data from Instruction-ROM
+`endif
+
+input [`LM32_WORD_RNG] d_dat_i;                         // Data Wishbone interface read data
+input d_ack_i;                                          // Data Wishbone interface acknowledgement
+input d_err_i;                                          // Data Wishbone interface error
+input d_rty_i;                                          // Data Wishbone interface retry
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+`ifdef CFG_DCACHE_ENABLED
+output dcache_refill_request;                           // Request to refill data cache
+wire   dcache_refill_request;
+output dcache_restart_request;                          // Request to restart the instruction that caused a data cache miss
+wire   dcache_restart_request;
+output dcache_stall_request;                            // Data cache stall request
+wire   dcache_stall_request;
+output dcache_refilling;
+wire   dcache_refilling;
+`endif
+
+`ifdef CFG_IROM_ENABLED   
+output irom_store_data_m;                               // Store data to Instruction ROM
+wire   [`LM32_WORD_RNG] irom_store_data_m;
+output [`LM32_WORD_RNG] irom_address_xm;                // Load/store address to Instruction ROM
+wire   [`LM32_WORD_RNG] irom_address_xm;
+output irom_we_xm;                                      // Write-enable of 2nd port of Instruction ROM
+wire   irom_we_xm;
+output irom_stall_request_x;                            // Stall instruction in D stage  
+wire   irom_stall_request_x;                            
+`endif
+   
+output [`LM32_WORD_RNG] load_data_w;                    // Result of a load instruction
+reg    [`LM32_WORD_RNG] load_data_w;
+output stall_wb_load;                                   // Request to stall pipeline due to a load from the Wishbone interface
+reg    stall_wb_load;
+
+output [`LM32_WORD_RNG] d_dat_o;                        // Data Wishbone interface write data
+reg    [`LM32_WORD_RNG] d_dat_o;
+output [`LM32_WORD_RNG] d_adr_o;                        // Data Wishbone interface address
+reg    [`LM32_WORD_RNG] d_adr_o;
+output d_cyc_o;                                         // Data Wishbone interface cycle
+reg    d_cyc_o;
+output [`LM32_BYTE_SELECT_RNG] d_sel_o;                 // Data Wishbone interface byte select
+reg    [`LM32_BYTE_SELECT_RNG] d_sel_o;
+output d_stb_o;                                         // Data Wishbone interface strobe
+reg    d_stb_o; 
+output d_we_o;                                          // Data Wishbone interface write enable
+reg    d_we_o;
+output [`LM32_CTYPE_RNG] d_cti_o;                       // Data Wishbone interface cycle type 
+reg    [`LM32_CTYPE_RNG] d_cti_o;
+output d_lock_o;                                        // Date Wishbone interface lock bus
+reg    d_lock_o;
+output [`LM32_BTYPE_RNG] d_bte_o;                       // Data Wishbone interface burst type 
+wire   [`LM32_BTYPE_RNG] d_bte_o;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+// Microcode pipeline registers - See inputs for description
+reg [`LM32_SIZE_RNG] size_m;
+reg [`LM32_SIZE_RNG] size_w;
+reg sign_extend_m;
+reg sign_extend_w;
+reg [`LM32_WORD_RNG] store_data_x;       
+reg [`LM32_WORD_RNG] store_data_m;       
+reg [`LM32_BYTE_SELECT_RNG] byte_enable_x;
+reg [`LM32_BYTE_SELECT_RNG] byte_enable_m;
+wire [`LM32_WORD_RNG] data_m;
+reg [`LM32_WORD_RNG] data_w;
+
+`ifdef CFG_DCACHE_ENABLED
+wire dcache_select_x;                                   // Select data cache to load from / store to
+reg dcache_select_m;
+wire [`LM32_WORD_RNG] dcache_data_m;                    // Data read from cache
+wire [`LM32_WORD_RNG] dcache_refill_address;            // Address to refill data cache from
+reg dcache_refill_ready;                                // Indicates the next word of refill data is ready
+wire [`LM32_CTYPE_RNG] first_cycle_type;                // First Wishbone cycle type
+wire [`LM32_CTYPE_RNG] next_cycle_type;                 // Next Wishbone cycle type
+wire last_word;                                         // Indicates if this is the last word in the cache line
+wire [`LM32_WORD_RNG] first_address;                    // First cache refill address
+`endif
+`ifdef CFG_DRAM_ENABLED
+wire dram_select_x;                                     // Select data RAM to load from / store to
+reg dram_select_m;
+reg dram_bypass_en;                                     // RAW in data RAM; read latched (bypass) value rather than value from memory
+reg [`LM32_WORD_RNG] dram_bypass_data;                  // Latched value of store'd data to data RAM
+wire [`LM32_WORD_RNG] dram_data_out;                    // Data read from data RAM
+wire [`LM32_WORD_RNG] dram_data_m;                      // Data read from data RAM: bypass value or value from memory
+wire [`LM32_WORD_RNG] dram_store_data_m;                // Data to write to RAM
+`endif
+wire wb_select_x;                                       // Select Wishbone to load from / store to
+`ifdef CFG_IROM_ENABLED
+wire irom_select_x;                                     // Select instruction ROM to load from / store to
+reg  irom_select_m;
+`endif
+reg wb_select_m;
+reg [`LM32_WORD_RNG] wb_data_m;                         // Data read from Wishbone
+reg wb_load_complete;                                   // Indicates when a Wishbone load is complete
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+
+/////////////////////////////////////////////////////
+// Instantiations
+/////////////////////////////////////////////////////
+
+`ifdef CFG_DRAM_ENABLED
+   // Data RAM
+   pmi_ram_dp_true 
+     #(
+       // ----- Parameters -------
+       .pmi_family             (`LATTICE_FAMILY),
+
+       //.pmi_addr_depth_a       (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_addr_width_a       ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_data_width_a       (`LM32_WORD_WIDTH),
+       //.pmi_addr_depth_b       (1 << (clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_addr_width_b       ((clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)),
+       //.pmi_data_width_b       (`LM32_WORD_WIDTH),
+       
+       .pmi_addr_depth_a       (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
+       .pmi_addr_width_a       (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
+       .pmi_data_width_a       (`LM32_WORD_WIDTH),
+       .pmi_addr_depth_b       (`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1),
+       .pmi_addr_width_b       (clogb2_v1(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)),
+       .pmi_data_width_b       (`LM32_WORD_WIDTH),
+
+       .pmi_regmode_a          ("noreg"),
+       .pmi_regmode_b          ("noreg"),
+       .pmi_gsr                ("enable"),
+       .pmi_resetmode          ("sync"),
+       .pmi_init_file          (`CFG_DRAM_INIT_FILE),
+       .pmi_init_file_format   (`CFG_DRAM_INIT_FILE_FORMAT),
+       .module_type            ("pmi_ram_dp_true")
+       ) 
+       ram (
+           // ----- Inputs -------
+           .ClockA                 (clk_i),
+           .ClockB                 (clk_i),
+           .ResetA                 (rst_i),
+           .ResetB                 (rst_i),
+           .DataInA                ({32{1'b0}}),
+           .DataInB                (dram_store_data_m),
+           .AddressA               (load_store_address_x[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
+           .AddressB               (load_store_address_m[(clogb2(`CFG_DRAM_LIMIT/4-`CFG_DRAM_BASE_ADDRESS/4+1)-1)+2-1:2]),
+           // .ClockEnA               (!stall_x & (load_x | store_x)),
+           .ClockEnA               (!stall_x),
+           .ClockEnB               (!stall_m),
+           .WrA                    (`FALSE),
+           .WrB                    (store_q_m & dram_select_m), 
+           // ----- Outputs -------
+           .QA                     (dram_data_out),
+           .QB                     ()
+           );
+   
+   /*----------------------------------------------------------------------
+    EBRs cannot perform reads from location 'written to' on the same clock
+    edge. Therefore bypass logic is required to latch the store'd value
+    and use it for the load (instead of value from memory).
+    ----------------------------------------------------------------------*/
+   always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+     if (rst_i == `TRUE)
+       begin
+         dram_bypass_en <= `FALSE;
+         dram_bypass_data <= 0;
+       end
+     else
+       begin
+         if (stall_x == `FALSE)
+           dram_bypass_data <= dram_store_data_m;
+         
+         if (   (stall_m == `FALSE) 
+              && (stall_x == `FALSE)
+             && (store_q_m == `TRUE)
+             && (   (load_x == `TRUE)
+                 || (store_x == `TRUE)
+                )
+             && (load_store_address_x[(`LM32_WORD_WIDTH-1):2] == load_store_address_m[(`LM32_WORD_WIDTH-1):2])
+            )
+           dram_bypass_en <= `TRUE;
+         else
+           if (   (dram_bypass_en == `TRUE)
+               && (stall_x == `FALSE)
+              )
+             dram_bypass_en <= `FALSE;
+       end
+   
+   assign dram_data_m = dram_bypass_en ? dram_bypass_data : dram_data_out;
+`endif
+
+`ifdef CFG_DCACHE_ENABLED
+// Data cache
+lm32_dcache #(
+    .associativity          (associativity),
+    .sets                   (sets),
+    .bytes_per_line         (bytes_per_line),
+    .base_address           (base_address),
+    .limit                  (limit)
+    ) dcache ( 
+    // ----- Inputs -----
+    .clk_i                  (clk_i),
+    .rst_i                  (rst_i),      
+    .stall_a                (stall_a),
+    .stall_x                (stall_x),
+    .stall_m                (stall_m),
+    .address_x              (load_store_address_x),
+    .address_m              (load_store_address_m),
+    .load_q_m               (load_q_m & dcache_select_m),
+    .store_q_m              (store_q_m & dcache_select_m),
+    .store_data             (store_data_m),
+    .store_byte_select      (byte_enable_m & {4{dcache_select_m}}),
+    .refill_ready           (dcache_refill_ready),
+    .refill_data            (wb_data_m),
+    .dflush                 (dflush),
+    // ----- Outputs -----
+    .stall_request          (dcache_stall_request),
+    .restart_request        (dcache_restart_request),
+    .refill_request         (dcache_refill_request),
+    .refill_address         (dcache_refill_address),
+    .refilling              (dcache_refilling),
+    .load_data              (dcache_data_m)
+    );
+`endif
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+// Select where data should be loaded from / stored to
+`ifdef CFG_DRAM_ENABLED
+   assign dram_select_x =    (load_store_address_x >= `CFG_DRAM_BASE_ADDRESS) 
+                          && (load_store_address_x <= `CFG_DRAM_LIMIT);
+`endif
+
+`ifdef CFG_IROM_ENABLED
+   assign irom_select_x =    (load_store_address_x >= `CFG_IROM_BASE_ADDRESS) 
+                          && (load_store_address_x <= `CFG_IROM_LIMIT);
+`endif
+   
+`ifdef CFG_DCACHE_ENABLED
+   assign dcache_select_x =    (load_store_address_x >= `CFG_DCACHE_BASE_ADDRESS) 
+                            && (load_store_address_x <= `CFG_DCACHE_LIMIT)
+`ifdef CFG_DRAM_ENABLED
+                            && (dram_select_x == `FALSE)
+`endif
+`ifdef CFG_IROM_ENABLED
+                            && (irom_select_x == `FALSE)
+`endif
+                     ;
+`endif
+         
+   assign wb_select_x =    `TRUE
+`ifdef CFG_DCACHE_ENABLED
+                        && !dcache_select_x 
+`endif
+`ifdef CFG_DRAM_ENABLED
+                        && !dram_select_x
+`endif
+`ifdef CFG_IROM_ENABLED
+                        && !irom_select_x
+`endif
+                     ;
+
+// Make sure data to store is in correct byte lane
+always @(*)
+begin
+    case (size_x)
+    `LM32_SIZE_BYTE:  store_data_x = {4{store_operand_x[7:0]}};
+    `LM32_SIZE_HWORD: store_data_x = {2{store_operand_x[15:0]}};
+    `LM32_SIZE_WORD:  store_data_x = store_operand_x;    
+    default:          store_data_x = {`LM32_WORD_WIDTH{1'bx}};
+    endcase
+end
+
+// Generate byte enable accoring to size of load or store and address being accessed
+always @(*)
+begin
+    casez ({size_x, load_store_address_x[1:0]})
+    {`LM32_SIZE_BYTE, 2'b11}:  byte_enable_x = 4'b0001;
+    {`LM32_SIZE_BYTE, 2'b10}:  byte_enable_x = 4'b0010;
+    {`LM32_SIZE_BYTE, 2'b01}:  byte_enable_x = 4'b0100;
+    {`LM32_SIZE_BYTE, 2'b00}:  byte_enable_x = 4'b1000;
+    {`LM32_SIZE_HWORD, 2'b1?}: byte_enable_x = 4'b0011;
+    {`LM32_SIZE_HWORD, 2'b0?}: byte_enable_x = 4'b1100;
+    {`LM32_SIZE_WORD, 2'b??}:  byte_enable_x = 4'b1111;
+    default:                   byte_enable_x = 4'bxxxx;
+    endcase
+end
+
+`ifdef CFG_DRAM_ENABLED
+// Only replace selected bytes
+assign dram_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : dram_data_m[`LM32_BYTE_0_RNG];
+assign dram_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : dram_data_m[`LM32_BYTE_1_RNG];
+assign dram_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : dram_data_m[`LM32_BYTE_2_RNG];
+assign dram_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : dram_data_m[`LM32_BYTE_3_RNG];
+`endif
+
+`ifdef CFG_IROM_ENABLED
+// Only replace selected bytes
+assign irom_store_data_m[`LM32_BYTE_0_RNG] = byte_enable_m[0] ? store_data_m[`LM32_BYTE_0_RNG] : irom_data_m[`LM32_BYTE_0_RNG];
+assign irom_store_data_m[`LM32_BYTE_1_RNG] = byte_enable_m[1] ? store_data_m[`LM32_BYTE_1_RNG] : irom_data_m[`LM32_BYTE_1_RNG];
+assign irom_store_data_m[`LM32_BYTE_2_RNG] = byte_enable_m[2] ? store_data_m[`LM32_BYTE_2_RNG] : irom_data_m[`LM32_BYTE_2_RNG];
+assign irom_store_data_m[`LM32_BYTE_3_RNG] = byte_enable_m[3] ? store_data_m[`LM32_BYTE_3_RNG] : irom_data_m[`LM32_BYTE_3_RNG];
+`endif
+
+`ifdef CFG_IROM_ENABLED
+   // Instead of implementing a byte-addressable instruction ROM (for store byte instruction),
+   // a load-and-store architecture is used wherein a 32-bit value is loaded, the requisite
+   // byte is replaced, and the whole 32-bit value is written back
+   
+   assign irom_address_xm = ((irom_select_m == `TRUE) && (store_q_m == `TRUE))
+                           ? load_store_address_m
+                           : load_store_address_x;
+   
+   // All store instructions perform a write operation in the M stage
+   assign irom_we_xm =    (irom_select_m == `TRUE)
+                      && (store_q_m == `TRUE);
+   
+   // A single port in instruction ROM is available to load-store unit for doing loads/stores.
+   // Since every store requires a load (in X stage) and then a store (in M stage), we cannot
+   // allow load (or store) instructions sequentially after the store instructions to proceed 
+   // until the store instruction has vacated M stage (i.e., completed the store operation)
+   assign irom_stall_request_x =    (irom_select_x == `TRUE)
+                                && (store_q_x == `TRUE);
+`endif
+   
+`ifdef CFG_DCACHE_ENABLED
+ `ifdef CFG_DRAM_ENABLED
+  `ifdef CFG_IROM_ENABLED
+   // WB + DC + DRAM + IROM
+   assign data_m = wb_select_m == `TRUE 
+                   ? wb_data_m
+                   : dram_select_m == `TRUE 
+                     ? dram_data_m
+                     : irom_select_m == `TRUE
+                       ? irom_data_m 
+                       : dcache_data_m;
+  `else
+   // WB + DC + DRAM
+   assign data_m = wb_select_m == `TRUE 
+                   ? wb_data_m
+                   : dram_select_m == `TRUE 
+                     ? dram_data_m
+                     : dcache_data_m;
+  `endif
+ `else
+  `ifdef CFG_IROM_ENABLED
+   // WB + DC + IROM
+   assign data_m = wb_select_m == `TRUE 
+                   ? wb_data_m
+                   : irom_select_m == `TRUE 
+                     ? irom_data_m
+                     : dcache_data_m;
+  `else
+   // WB + DC
+   assign data_m = wb_select_m == `TRUE 
+                   ? wb_data_m 
+                   : dcache_data_m;
+  `endif
+ `endif
+`else
+ `ifdef CFG_DRAM_ENABLED
+  `ifdef CFG_IROM_ENABLED
+   // WB + DRAM + IROM
+   assign data_m = wb_select_m == `TRUE 
+                   ? wb_data_m 
+                   : dram_select_m == `TRUE
+                     ? dram_data_m
+                     : irom_data_m;
+  `else
+   // WB + DRAM
+   assign data_m = wb_select_m == `TRUE 
+                   ? wb_data_m 
+                   : dram_data_m;
+  `endif
+ `else
+  `ifdef CFG_IROM_ENABLED
+   // WB + IROM
+   assign data_m = wb_select_m == `TRUE 
+                   ? wb_data_m 
+                   : irom_data_m;
+  `else
+   // WB
+   assign data_m = wb_data_m;
+  `endif
+ `endif
+`endif
+
+// Sub-word selection and sign/zero-extension for loads
+always @(*)
+begin
+    casez ({size_w, load_store_address_w[1:0]})
+    {`LM32_SIZE_BYTE, 2'b11}:  load_data_w = {{24{sign_extend_w & data_w[7]}}, data_w[7:0]};
+    {`LM32_SIZE_BYTE, 2'b10}:  load_data_w = {{24{sign_extend_w & data_w[15]}}, data_w[15:8]};
+    {`LM32_SIZE_BYTE, 2'b01}:  load_data_w = {{24{sign_extend_w & data_w[23]}}, data_w[23:16]};
+    {`LM32_SIZE_BYTE, 2'b00}:  load_data_w = {{24{sign_extend_w & data_w[31]}}, data_w[31:24]};
+    {`LM32_SIZE_HWORD, 2'b1?}: load_data_w = {{16{sign_extend_w & data_w[15]}}, data_w[15:0]};
+    {`LM32_SIZE_HWORD, 2'b0?}: load_data_w = {{16{sign_extend_w & data_w[31]}}, data_w[31:16]};
+    {`LM32_SIZE_WORD, 2'b??}:  load_data_w = data_w;
+    default:                   load_data_w = {`LM32_WORD_WIDTH{1'bx}};
+    endcase
+end
+
+// Unused/constant Wishbone signals
+assign d_bte_o = `LM32_BTYPE_LINEAR;
+
+`ifdef CFG_DCACHE_ENABLED                
+// Generate signal to indicate last word in cache line
+generate 
+    case (bytes_per_line)
+    4:
+    begin
+assign first_cycle_type = `LM32_CTYPE_END;
+assign next_cycle_type = `LM32_CTYPE_END;
+assign last_word = `TRUE;
+assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:2], 2'b00};
+    end
+    8:
+    begin
+assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
+assign next_cycle_type = `LM32_CTYPE_END;
+assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
+assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
+    end
+    16:
+    begin
+assign first_cycle_type = `LM32_CTYPE_INCREMENTING;
+assign next_cycle_type = d_adr_o[addr_offset_msb] == 1'b1 ? `LM32_CTYPE_END : `LM32_CTYPE_INCREMENTING;
+assign last_word = (&d_adr_o[addr_offset_msb:addr_offset_lsb]) == 1'b1;
+assign first_address = {dcache_refill_address[`LM32_WORD_WIDTH-1:addr_offset_msb+1], {addr_offset_width{1'b0}}, 2'b00};
+    end
+    endcase
+endgenerate
+`endif
+
+/////////////////////////////////////////////////////
+// Sequential Logic
+/////////////////////////////////////////////////////
+
+// Data Wishbone interface
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        d_cyc_o <= `FALSE;
+        d_stb_o <= `FALSE;
+        d_dat_o <= {`LM32_WORD_WIDTH{1'b0}};
+        d_adr_o <= {`LM32_WORD_WIDTH{1'b0}};
+        d_sel_o <= {`LM32_BYTE_SELECT_WIDTH{`FALSE}};
+        d_we_o <= `FALSE;
+        d_cti_o <= `LM32_CTYPE_END;
+        d_lock_o <= `FALSE;
+        wb_data_m <= {`LM32_WORD_WIDTH{1'b0}};
+        wb_load_complete <= `FALSE;
+        stall_wb_load <= `FALSE;
+`ifdef CFG_DCACHE_ENABLED                
+        dcache_refill_ready <= `FALSE;
+`endif                
+    end
+    else
+    begin
+`ifdef CFG_DCACHE_ENABLED 
+        // Refill ready should only be asserted for a single cycle               
+        dcache_refill_ready <= `FALSE;
+`endif                
+        // Is a Wishbone cycle already in progress?
+        if (d_cyc_o == `TRUE)
+        begin
+            // Is the cycle complete?
+            if ((d_ack_i == `TRUE) || (d_err_i == `TRUE))
+            begin
+`ifdef CFG_DCACHE_ENABLED                
+                if ((dcache_refilling == `TRUE) && (!last_word))
+                begin
+                    // Fetch next word of cache line    
+                    d_adr_o[addr_offset_msb:addr_offset_lsb] <= d_adr_o[addr_offset_msb:addr_offset_lsb] + 1'b1;
+                end
+                else
+`endif                
+                begin
+                    // Refill/access complete
+                    d_cyc_o <= `FALSE;
+                    d_stb_o <= `FALSE;
+                    d_lock_o <= `FALSE;
+                end
+`ifdef CFG_DCACHE_ENABLED    
+                d_cti_o <= next_cycle_type;
+                // If we are performing a refill, indicate to cache next word of data is ready            
+                dcache_refill_ready <= dcache_refilling;
+`endif
+                // Register data read from Wishbone interface
+                wb_data_m <= d_dat_i;
+                // Don't set when stores complete - otherwise we'll deadlock if load in m stage
+                wb_load_complete <= !d_we_o;
+            end
+            // synthesis translate_off            
+            if (d_err_i == `TRUE)
+                $display ("Data bus error. Address: %x", d_adr_o);
+            // synthesis translate_on
+        end
+        else
+        begin
+`ifdef CFG_DCACHE_ENABLED                
+            if (dcache_refill_request == `TRUE)
+            begin
+                // Start cache refill
+                d_adr_o <= first_address;
+                d_cyc_o <= `TRUE;
+                d_sel_o <= {`LM32_WORD_WIDTH/8{`TRUE}};
+                d_stb_o <= `TRUE;                
+                d_we_o <= `FALSE;
+                d_cti_o <= first_cycle_type;
+                //d_lock_o <= `TRUE;
+            end
+            else 
+`endif            
+                 if (   (store_q_m == `TRUE)
+                     && (stall_m == `FALSE)
+`ifdef CFG_DRAM_ENABLED
+                     && (dram_select_m == `FALSE)
+`endif
+`ifdef CFG_IROM_ENABLED
+                    && (irom_select_m == `FALSE)
+`endif                 
+                    )
+            begin
+                // Data cache is write through, so all stores go to memory
+                d_dat_o <= store_data_m;
+                d_adr_o <= load_store_address_m;
+                d_cyc_o <= `TRUE;
+                d_sel_o <= byte_enable_m;
+                d_stb_o <= `TRUE;
+                d_we_o <= `TRUE;
+                d_cti_o <= `LM32_CTYPE_END;
+            end        
+            else if (   (load_q_m == `TRUE) 
+                     && (wb_select_m == `TRUE) 
+                     && (wb_load_complete == `FALSE)
+                     // stall_m will be TRUE, because stall_wb_load will be TRUE 
+                    )
+            begin
+                // Read requested address
+                stall_wb_load <= `FALSE;
+                d_adr_o <= load_store_address_m;
+                d_cyc_o <= `TRUE;
+                d_sel_o <= byte_enable_m;
+                d_stb_o <= `TRUE;
+                d_we_o <= `FALSE;
+                d_cti_o <= `LM32_CTYPE_END;
+            end
+        end
+        // Clear load/store complete flag when instruction leaves M stage
+        if (stall_m == `FALSE)
+            wb_load_complete <= `FALSE;
+        // When a Wishbone load first enters the M stage, we need to stall it
+        if ((load_q_x == `TRUE) && (wb_select_x == `TRUE) && (stall_x == `FALSE))
+            stall_wb_load <= `TRUE;
+        // Clear stall request if load instruction is killed
+        if ((kill_m == `TRUE) || (exception_m == `TRUE))
+            stall_wb_load <= `FALSE;
+    end
+end
+
+// Pipeline registers  
+
+// X/M stage pipeline registers
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        sign_extend_m <= `FALSE;
+        size_m <= 2'b00;
+        byte_enable_m <= `FALSE;
+        store_data_m <= {`LM32_WORD_WIDTH{1'b0}};
+`ifdef CFG_DCACHE_ENABLED
+        dcache_select_m <= `FALSE;
+`endif
+`ifdef CFG_DRAM_ENABLED
+        dram_select_m <= `FALSE;
+`endif
+`ifdef CFG_IROM_ENABLED
+        irom_select_m <= `FALSE;
+`endif
+        wb_select_m <= `FALSE;        
+    end
+    else
+    begin
+        if (stall_m == `FALSE)
+        begin
+            sign_extend_m <= sign_extend_x;
+            size_m <= size_x;
+            byte_enable_m <= byte_enable_x;    
+            store_data_m <= store_data_x;
+`ifdef CFG_DCACHE_ENABLED
+            dcache_select_m <= dcache_select_x;
+`endif
+`ifdef CFG_DRAM_ENABLED
+            dram_select_m <= dram_select_x;
+`endif
+`ifdef CFG_IROM_ENABLED
+            irom_select_m <= irom_select_x;
+`endif
+            wb_select_m <= wb_select_x;
+        end
+    end
+end
+
+// M/W stage pipeline registers
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        size_w <= 2'b00;
+        data_w <= {`LM32_WORD_WIDTH{1'b0}};
+        sign_extend_w <= `FALSE;
+    end
+    else
+    begin
+        size_w <= size_m;
+        data_w <= data_m;
+        sign_extend_w <= sign_extend_m;
+    end
+end
+
+/////////////////////////////////////////////////////
+// Behavioural Logic
+/////////////////////////////////////////////////////
+
+// synthesis translate_off
+
+// Check for non-aligned loads or stores
+always @(posedge clk_i)
+begin
+    if (((load_q_m == `TRUE) || (store_q_m == `TRUE)) && (stall_m == `FALSE)) 
+    begin
+        if ((size_m === `LM32_SIZE_HWORD) && (load_store_address_m[0] !== 1'b0))
+            $display ("Warning: Non-aligned halfword access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
+        if ((size_m === `LM32_SIZE_WORD) && (load_store_address_m[1:0] !== 2'b00))
+            $display ("Warning: Non-aligned word access. Address: 0x%0x Time: %0t.", load_store_address_m, $time);
+    end
+end
+
+// synthesis translate_on
+
+endmodule
diff --git a/verilog/lm32/lm32_logic_op.v b/verilog/lm32/lm32_logic_op.v
new file mode 100644 (file)
index 0000000..7b1a20d
--- /dev/null
@@ -0,0 +1,97 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_logic_op.v
+// Title            : Logic operations (and / or / not etc)
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_logic_op (
+    // ----- Inputs -------
+    logic_op_x,
+    operand_0_x,
+    operand_1_x,
+    // ----- Outputs -------
+    logic_result_x
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input [`LM32_LOGIC_OP_RNG] logic_op_x;
+input [`LM32_WORD_RNG] operand_0_x;
+input [`LM32_WORD_RNG] operand_1_x;
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output [`LM32_WORD_RNG] logic_result_x;
+reg    [`LM32_WORD_RNG] logic_result_x;
+    
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+integer logic_idx;
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+
+always @(*)
+begin
+    for(logic_idx = 0; logic_idx < `LM32_WORD_WIDTH; logic_idx = logic_idx + 1)
+        logic_result_x[logic_idx] = logic_op_x[{operand_1_x[logic_idx], operand_0_x[logic_idx]}];
+end
+    
+endmodule
+
diff --git a/verilog/lm32/lm32_mc_arithmetic.v b/verilog/lm32/lm32_mc_arithmetic.v
new file mode 100644 (file)
index 0000000..d476d55
--- /dev/null
@@ -0,0 +1,309 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm_mc_arithmetic.v
+// Title            : Multi-cycle arithmetic unit.
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+           
+`define LM32_MC_STATE_RNG         2:0
+`define LM32_MC_STATE_IDLE        3'b000
+`define LM32_MC_STATE_MULTIPLY    3'b001
+`define LM32_MC_STATE_MODULUS     3'b010   
+`define LM32_MC_STATE_DIVIDE      3'b011 
+`define LM32_MC_STATE_SHIFT_LEFT  3'b100
+`define LM32_MC_STATE_SHIFT_RIGHT 3'b101
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_mc_arithmetic (
+    // ----- Inputs -----
+    clk_i,
+    rst_i,
+    stall_d,
+    kill_x,
+`ifdef CFG_MC_DIVIDE_ENABLED
+    divide_d,
+    modulus_d,
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+    multiply_d,
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+    shift_left_d,
+    shift_right_d,
+    sign_extend_d,
+`endif
+    operand_0_d,
+    operand_1_d,
+    // ----- Ouputs -----
+    result_x,
+`ifdef CFG_MC_DIVIDE_ENABLED
+    divide_by_zero_x,
+`endif
+    stall_request_x
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                    // Clock
+input rst_i;                                    // Reset
+input stall_d;                                  // Stall instruction in D stage
+input kill_x;                                   // Kill instruction in X stage
+`ifdef CFG_MC_DIVIDE_ENABLED
+input divide_d;                                 // Perform divide
+input modulus_d;                                // Perform modulus
+`endif
+`ifdef CFG_MC_MULTIPLY_ENABLED
+input multiply_d;                               // Perform multiply
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+input shift_left_d;                             // Perform left shift
+input shift_right_d;                            // Perform right shift
+input sign_extend_d;                            // Whether to sign-extend (arithmetic) or zero-extend (logical)
+`endif
+input [`LM32_WORD_RNG] operand_0_d;
+input [`LM32_WORD_RNG] operand_1_d;
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output [`LM32_WORD_RNG] result_x;               // Result of operation
+reg    [`LM32_WORD_RNG] result_x;
+`ifdef CFG_MC_DIVIDE_ENABLED
+output divide_by_zero_x;                        // A divide by zero was attempted
+reg    divide_by_zero_x;
+`endif
+output stall_request_x;                         // Request to stall pipeline from X stage back
+wire   stall_request_x;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+reg [`LM32_WORD_RNG] p;                         // Temporary registers
+reg [`LM32_WORD_RNG] a;
+reg [`LM32_WORD_RNG] b;
+`ifdef CFG_MC_DIVIDE_ENABLED
+wire [32:0] t;
+`endif
+
+reg [`LM32_MC_STATE_RNG] state;                 // Current state of FSM
+reg [5:0] cycles;                               // Number of cycles remaining in the operation
+
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+reg sign_extend_x;                              // Whether to sign extend of zero extend right shifts
+wire fill_value;                                // Value to fill with for right barrel-shifts
+`endif
+
+/////////////////////////////////////////////////////
+// Combinational logic
+/////////////////////////////////////////////////////
+
+// Stall pipeline while any operation is being performed
+assign stall_request_x = state != `LM32_MC_STATE_IDLE;
+
+`ifdef CFG_MC_DIVIDE_ENABLED
+// Subtraction
+assign t = {p[`LM32_WORD_WIDTH-2:0], a[`LM32_WORD_WIDTH-1]} - b;
+`endif
+
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+// Determine fill value for right shift - Sign bit for arithmetic shift, or zero for logical shift
+assign fill_value = (sign_extend_x == `TRUE) & b[`LM32_WORD_WIDTH-1];
+`endif
+
+/////////////////////////////////////////////////////
+// Sequential logic
+/////////////////////////////////////////////////////
+
+// Perform right shift
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        cycles <= {6{1'b0}};
+        p <= {`LM32_WORD_WIDTH{1'b0}};
+        a <= {`LM32_WORD_WIDTH{1'b0}};
+        b <= {`LM32_WORD_WIDTH{1'b0}};
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+        sign_extend_x <= 1'b0;
+`endif
+`ifdef CFG_MC_DIVIDE_ENABLED
+        divide_by_zero_x <= `FALSE;
+`endif
+        result_x <= {`LM32_WORD_WIDTH{1'b0}};
+        state <= `LM32_MC_STATE_IDLE;
+    end
+    else
+    begin
+`ifdef CFG_MC_DIVIDE_ENABLED
+        divide_by_zero_x <= `FALSE;
+`endif
+        case (state)
+        `LM32_MC_STATE_IDLE:
+        begin
+            if (stall_d == `FALSE)                 
+            begin          
+                cycles <= `LM32_WORD_WIDTH;
+                p <= 32'b0;
+                a <= operand_0_d;
+                b <= operand_1_d;                    
+`ifdef CFG_MC_DIVIDE_ENABLED
+                if (divide_d == `TRUE)
+                    state <= `LM32_MC_STATE_DIVIDE;
+                if (modulus_d == `TRUE)
+                    state <= `LM32_MC_STATE_MODULUS;
+`endif                    
+`ifdef CFG_MC_MULTIPLY_ENABLED
+                if (multiply_d == `TRUE)
+                    state <= `LM32_MC_STATE_MULTIPLY;
+`endif
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+                if (shift_left_d == `TRUE)
+                begin
+                    state <= `LM32_MC_STATE_SHIFT_LEFT;
+                    sign_extend_x <= sign_extend_d;
+                    cycles <= operand_1_d[4:0];
+                    a <= operand_0_d;
+                    b <= operand_0_d;
+                end
+                if (shift_right_d == `TRUE)
+                begin
+                    state <= `LM32_MC_STATE_SHIFT_RIGHT;
+                    sign_extend_x <= sign_extend_d;
+                    cycles <= operand_1_d[4:0];
+                    a <= operand_0_d;
+                    b <= operand_0_d;
+                end
+`endif
+            end            
+        end
+`ifdef CFG_MC_DIVIDE_ENABLED
+        `LM32_MC_STATE_DIVIDE:
+        begin
+            if (t[32] == 1'b0)
+            begin
+                p <= t[31:0];
+                a <= {a[`LM32_WORD_WIDTH-2:0], 1'b1};
+            end
+            else 
+            begin
+                p <= {p[`LM32_WORD_WIDTH-2:0], a[`LM32_WORD_WIDTH-1]};
+                a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0};
+            end
+            result_x <= a;
+            if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE))
+            begin
+                // Check for divide by zero
+                divide_by_zero_x <= b == {`LM32_WORD_WIDTH{1'b0}};
+                state <= `LM32_MC_STATE_IDLE;
+            end
+            cycles <= cycles - 1'b1;
+        end
+        `LM32_MC_STATE_MODULUS:
+        begin
+            if (t[32] == 1'b0)
+            begin
+                p <= t[31:0];
+                a <= {a[`LM32_WORD_WIDTH-2:0], 1'b1};
+            end
+            else 
+            begin
+                p <= {p[`LM32_WORD_WIDTH-2:0], a[`LM32_WORD_WIDTH-1]};
+                a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0};
+            end
+            result_x <= p;
+            if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE))
+            begin
+                // Check for divide by zero
+                divide_by_zero_x <= b == {`LM32_WORD_WIDTH{1'b0}};
+                state <= `LM32_MC_STATE_IDLE;
+            end
+            cycles <= cycles - 1'b1;
+        end
+`endif        
+`ifdef CFG_MC_MULTIPLY_ENABLED
+        `LM32_MC_STATE_MULTIPLY:
+        begin
+            if (b[0] == 1'b1)
+                p <= p + a;
+            b <= {1'b0, b[`LM32_WORD_WIDTH-1:1]};
+            a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0};
+            result_x <= p;
+            if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE))
+                state <= `LM32_MC_STATE_IDLE;
+            cycles <= cycles - 1'b1;
+        end
+`endif     
+`ifdef CFG_MC_BARREL_SHIFT_ENABLED
+        `LM32_MC_STATE_SHIFT_LEFT:
+        begin       
+            a <= {a[`LM32_WORD_WIDTH-2:0], 1'b0};
+            result_x <= a;
+            if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE))
+                state <= `LM32_MC_STATE_IDLE;
+            cycles <= cycles - 1'b1;
+        end
+        `LM32_MC_STATE_SHIFT_RIGHT:
+        begin       
+            b <= {fill_value, b[`LM32_WORD_WIDTH-1:1]};
+            result_x <= b;
+            if ((cycles == `LM32_WORD_WIDTH'd0) || (kill_x == `TRUE))
+                state <= `LM32_MC_STATE_IDLE;
+            cycles <= cycles - 1'b1;
+        end
+`endif   
+        endcase
+    end
+end 
+
+endmodule
diff --git a/verilog/lm32/lm32_multiplier.v b/verilog/lm32/lm32_multiplier.v
new file mode 100644 (file)
index 0000000..d68d150
--- /dev/null
@@ -0,0 +1,120 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_multiplier.v
+// Title            : Pipelined multiplier.
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+                  
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_multiplier (
+    // ----- Inputs -----
+    clk_i,
+    rst_i,
+    stall_x,
+    stall_m,
+    operand_0,
+    operand_1,
+    // ----- Ouputs -----
+    result
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                            // Clock 
+input rst_i;                            // Reset
+input stall_x;                          // Stall instruction in X stage
+input stall_m;                          // Stall instruction in M stage
+input [`LM32_WORD_RNG] operand_0;      // Muliplicand
+input [`LM32_WORD_RNG] operand_1;      // Multiplier
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output [`LM32_WORD_RNG] result;        // Product of multiplication
+reg    [`LM32_WORD_RNG] result;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+reg [`LM32_WORD_RNG] muliplicand; 
+reg [`LM32_WORD_RNG] multiplier; 
+reg [`LM32_WORD_RNG] product; 
+
+/////////////////////////////////////////////////////
+// Sequential logic
+/////////////////////////////////////////////////////
+
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        muliplicand <= {`LM32_WORD_WIDTH{1'b0}};
+        multiplier <= {`LM32_WORD_WIDTH{1'b0}};
+        product <= {`LM32_WORD_WIDTH{1'b0}};
+        result <= {`LM32_WORD_WIDTH{1'b0}};
+    end
+    else
+    begin
+        if (stall_x == `FALSE)
+        begin    
+            muliplicand <= operand_0;
+            multiplier <= operand_1;
+        end
+        if (stall_m == `FALSE)
+            product <= muliplicand * multiplier;
+        result <= product;
+    end
+end
+
+endmodule
diff --git a/verilog/lm32/lm32_multiplier_spartan6.v b/verilog/lm32/lm32_multiplier_spartan6.v
new file mode 100644 (file)
index 0000000..eb25754
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Milkymist SoC
+ * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+module lm32_multiplier(
+       input clk_i,
+       input rst_i,
+       input stall_x,
+       input stall_m,
+       input [31:0] operand_0,
+       input [31:0] operand_1,
+       output [31:0] result
+);
+
+// See UG389, esp. p. 29 "Fully Pipelined, 35 x 35 Multiplier Use Model (Large Multiplier)"
+
+wire [17:0] au = {3'd0, operand_0[31:17]};
+wire [17:0] al = {1'b0, operand_0[16:0]};
+wire [17:0] bu = {3'd0, operand_1[31:17]};
+wire [17:0] bl = {1'b0, operand_1[16:0]};
+
+wire [17:0] bl_forward;
+wire [35:0] al_bl;
+
+reg [16:0] result_low;
+always @(posedge clk_i) begin
+       if(rst_i)
+               result_low <= 17'd0;
+       else
+               result_low <= al_bl[16:0];
+end
+assign result[16:0] = result_low;
+
+DSP48A1 #(
+       .A0REG(1),
+       .A1REG(0),
+       .B0REG(1),
+       .B1REG(0),
+       .CARRYINREG(0),
+       .CARRYINSEL("OPMODE5"),
+       .CARRYOUTREG(0),
+       .CREG(0),
+       .DREG(0),
+       .MREG(1),
+       .OPMODEREG(0),
+       .PREG(0),
+       .RSTTYPE("SYNC")
+) D1 (
+       .BCOUT(bl_forward),
+       .PCOUT(),
+       .CARRYOUT(),
+       .CARRYOUTF(),
+       .M(al_bl),
+       .P(),
+       .PCIN(),
+       .CLK(clk_i),
+       .OPMODE(8'd1),
+       .A(al),
+       .B(bl),
+       .C(),
+       .CARRYIN(),
+       .D(),
+       .CEA(~stall_x),
+       .CEB(~stall_x),
+       .CEC(),
+       .CECARRYIN(),
+       .CED(),
+       .CEM(~stall_m),
+       .CEOPMODE(),
+       .CEP(1'b1),
+       .RSTA(rst_i),
+       .RSTB(rst_i),
+       .RSTC(),
+       .RSTCARRYIN(),
+       .RSTD(),
+       .RSTM(rst_i),
+       .RSTOPMODE(),
+       .RSTP()
+);
+
+wire [47:0] au_bl_sum;
+
+DSP48A1 #(
+       .A0REG(1),
+       .A1REG(0),
+       .B0REG(0),
+       .B1REG(0),
+       .CARRYINREG(0),
+       .CARRYINSEL("OPMODE5"),
+       .CARRYOUTREG(0),
+       .CREG(0),
+       .DREG(0),
+       .MREG(1),
+       .OPMODEREG(0),
+       .PREG(0),
+       .RSTTYPE("SYNC")
+) D2 (
+       .BCOUT(),
+       .PCOUT(au_bl_sum),
+       .CARRYOUT(),
+       .CARRYOUTF(),
+       .M(),
+       .P(),
+       .PCIN(),
+       .CLK(clk_i),
+       .OPMODE(8'd13),
+       .A(au),
+       .B(bl_forward),
+       .C({31'd0, al_bl[33:17]}),
+       .CARRYIN(),
+       .D(),
+       .CEA(~stall_x),
+       .CEB(),
+       .CEC(),
+       .CECARRYIN(),
+       .CED(),
+       .CEM(~stall_m),
+       .CEOPMODE(),
+       .CEP(),
+       .RSTA(rst_i),
+       .RSTB(),
+       .RSTC(),
+       .RSTCARRYIN(),
+       .RSTD(),
+       .RSTM(rst_i),
+       .RSTOPMODE(),
+       .RSTP()
+);
+
+wire [47:0] r_full;
+assign result[31:17] = r_full[16:0];
+
+DSP48A1 #(
+       .A0REG(1),
+       .A1REG(0),
+       .B0REG(1),
+       .B1REG(0),
+       .CARRYINREG(0),
+       .CARRYINSEL("OPMODE5"),
+       .CARRYOUTREG(0),
+       .CREG(0),
+       .DREG(0),
+       .MREG(1),
+       .OPMODEREG(0),
+       .PREG(1),
+       .RSTTYPE("SYNC")
+) D3 (
+       .BCOUT(),
+       .PCOUT(),
+       .CARRYOUT(),
+       .CARRYOUTF(),
+       .M(),
+       .P(r_full),
+       .PCIN(au_bl_sum),
+       .CLK(clk_i),
+       .OPMODE(8'd5),
+       .A(bu),
+       .B(al),
+       .C(),
+       .CARRYIN(),
+       .D(),
+       .CEA(~stall_x),
+       .CEB(~stall_x),
+       .CEC(),
+       .CECARRYIN(),
+       .CED(),
+       .CEM(~stall_m),
+       .CEOPMODE(),
+       .CEP(1'b1),
+       .RSTA(rst_i),
+       .RSTB(rst_i),
+       .RSTC(),
+       .RSTCARRYIN(),
+       .RSTD(),
+       .RSTM(rst_i),
+       .RSTOPMODE(),
+       .RSTP(rst_i)
+);
+
+endmodule
diff --git a/verilog/lm32/lm32_ram.v b/verilog/lm32/lm32_ram.v
new file mode 100644 (file)
index 0000000..d84352f
--- /dev/null
@@ -0,0 +1,128 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_ram.v
+// Title            : Pseudo dual-port RAM.
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : Options added to select EBRs (True-DP, Psuedo-DP, DQ, or
+//                  : Distributed RAM).
+// Version          : 3.2
+//                  : EBRs use SYNC resets instead of ASYNC resets.
+// Version          : 3.5
+//                  : Added read-after-write hazard resolution when using true
+//                  : dual-port EBRs
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_ram 
+  (
+   // ----- Inputs -------
+   read_clk,
+   write_clk,
+   reset,
+   enable_read,
+   read_address,
+   enable_write,
+   write_address,
+   write_data,
+   write_enable,
+   // ----- Outputs -------
+   read_data
+   );
+
+/*----------------------------------------------------------------------
+ Parameters
+ ----------------------------------------------------------------------*/
+parameter data_width = 1;               // Width of the data ports
+parameter address_width = 1;            // Width of the address ports
+
+/*----------------------------------------------------------------------
+ Inputs
+ ----------------------------------------------------------------------*/
+input read_clk;                         // Read clock
+input write_clk;                        // Write clock
+input reset;                            // Reset
+
+input enable_read;                      // Access enable
+input [address_width-1:0] read_address; // Read/write address
+input enable_write;                     // Access enable
+input [address_width-1:0] write_address;// Read/write address
+input [data_width-1:0] write_data;      // Data to write to specified address
+input write_enable;                     // Write enable
+
+/*----------------------------------------------------------------------
+ Outputs
+ ----------------------------------------------------------------------*/
+output [data_width-1:0] read_data;      // Data read from specified addess
+wire   [data_width-1:0] read_data;
+
+/*----------------------------------------------------------------------
+ Internal nets and registers
+ ----------------------------------------------------------------------*/
+reg [data_width-1:0]    mem[0:(1<<address_width)-1]; // The RAM
+reg [address_width-1:0] ra; // Registered read address
+
+/*----------------------------------------------------------------------
+ Combinational Logic
+ ----------------------------------------------------------------------*/
+// Read port
+assign read_data = mem[ra];
+
+/*----------------------------------------------------------------------
+ Sequential Logic
+ ----------------------------------------------------------------------*/
+// Write port
+always @(posedge write_clk)
+  if ((write_enable == `TRUE) && (enable_write == `TRUE))
+    mem[write_address] <= write_data;
+
+// Register read address for use on next cycle
+always @(posedge read_clk)
+  if (enable_read)
+    ra <= read_address;
+
+endmodule
diff --git a/verilog/lm32/lm32_shifter.v b/verilog/lm32/lm32_shifter.v
new file mode 100644 (file)
index 0000000..ce5b85d
--- /dev/null
@@ -0,0 +1,155 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_shifter.v
+// Title            : Barrel shifter
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : Initial Release
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_shifter (
+    // ----- Inputs -------
+    clk_i,
+    rst_i,
+    stall_x,
+    direction_x,
+    sign_extend_x,
+    operand_0_x,
+    operand_1_x,
+    // ----- Outputs -------
+    shifter_result_m
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                // Clock
+input rst_i;                                // Reset
+input stall_x;                              // Stall instruction in X stage
+input direction_x;                          // Direction to shift
+input sign_extend_x;                        // Whether shift is arithmetic (1'b1) or logical (1'b0)
+input [`LM32_WORD_RNG] operand_0_x;         // Operand to shift
+input [`LM32_WORD_RNG] operand_1_x;         // Operand that specifies how many bits to shift by
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+output [`LM32_WORD_RNG] shifter_result_m;   // Result of shift
+wire   [`LM32_WORD_RNG] shifter_result_m;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+
+reg direction_m;
+reg [`LM32_WORD_RNG] left_shift_result;
+reg [`LM32_WORD_RNG] right_shift_result;
+reg [`LM32_WORD_RNG] left_shift_operand;
+wire [`LM32_WORD_RNG] right_shift_operand;
+wire fill_value;
+wire [`LM32_WORD_RNG] right_shift_in;
+
+integer shift_idx_0;
+integer shift_idx_1;
+
+/////////////////////////////////////////////////////
+// Combinational Logic
+/////////////////////////////////////////////////////
+    
+// Select operands - To perform a left shift, we reverse the bits and perform a right shift
+always @(*)
+begin
+    for (shift_idx_0 = 0; shift_idx_0 < `LM32_WORD_WIDTH; shift_idx_0 = shift_idx_0 + 1)
+        left_shift_operand[`LM32_WORD_WIDTH-1-shift_idx_0] = operand_0_x[shift_idx_0];
+end
+assign right_shift_operand = direction_x == `LM32_SHIFT_OP_LEFT ? left_shift_operand : operand_0_x;
+
+// Determine fill value for right shift - Sign bit for arithmetic shift, or zero for logical shift
+assign fill_value = (sign_extend_x == `TRUE) && (direction_x == `LM32_SHIFT_OP_RIGHT) 
+                      ? operand_0_x[`LM32_WORD_WIDTH-1] 
+                      : 1'b0;
+
+// Determine bits to shift in for right shift or rotate
+assign right_shift_in = {`LM32_WORD_WIDTH{fill_value}};
+
+// Reverse bits to get left shift result
+always @(*)
+begin
+    for (shift_idx_1 = 0; shift_idx_1 < `LM32_WORD_WIDTH; shift_idx_1 = shift_idx_1 + 1)
+        left_shift_result[`LM32_WORD_WIDTH-1-shift_idx_1] = right_shift_result[shift_idx_1];
+end
+
+// Select result 
+assign shifter_result_m = direction_m == `LM32_SHIFT_OP_LEFT ? left_shift_result : right_shift_result;
+    
+/////////////////////////////////////////////////////
+// Sequential Logic
+/////////////////////////////////////////////////////
+
+// Perform right shift
+always @(posedge clk_i `CFG_RESET_SENSITIVITY)
+begin
+    if (rst_i == `TRUE)
+    begin
+        right_shift_result <= {`LM32_WORD_WIDTH{1'b0}};
+        direction_m <= `FALSE;
+    end
+    else
+    begin
+        if (stall_x == `FALSE)
+        begin
+            right_shift_result <= {right_shift_in, right_shift_operand} >> operand_1_x[`LM32_SHIFT_RNG];
+            direction_m <= direction_x;
+        end
+    end
+end 
+    
+endmodule
diff --git a/verilog/lm32/lm32_top.v b/verilog/lm32/lm32_top.v
new file mode 100644 (file)
index 0000000..c03e280
--- /dev/null
@@ -0,0 +1,354 @@
+//   ==================================================================
+//   >>>>>>>>>>>>>>>>>>>>>>> COPYRIGHT NOTICE <<<<<<<<<<<<<<<<<<<<<<<<<
+//   ------------------------------------------------------------------
+//   Copyright (c) 2006-2011 by Lattice Semiconductor Corporation
+//   ALL RIGHTS RESERVED 
+//   ------------------------------------------------------------------
+//
+//   IMPORTANT: THIS FILE IS AUTO-GENERATED BY THE LATTICEMICO SYSTEM.
+//
+//   Permission:
+//
+//      Lattice Semiconductor grants permission to use this code
+//      pursuant to the terms of the Lattice Semiconductor Corporation
+//      Open Source License Agreement.  
+//
+//   Disclaimer:
+//
+//      Lattice Semiconductor provides no warranty regarding the use or
+//      functionality of this code. It is the user's responsibility to
+//      verify the user's design for consistency and functionality through
+//      the use of formal verification methods.
+//
+//   --------------------------------------------------------------------
+//
+//                  Lattice Semiconductor Corporation
+//                  5555 NE Moore Court
+//                  Hillsboro, OR 97214
+//                  U.S.A
+//
+//                  TEL: 1-800-Lattice (USA and Canada)
+//                         503-286-8001 (other locations)
+//
+//                  web: http://www.latticesemi.com/
+//                  email: techsupport@latticesemi.com
+//
+//   --------------------------------------------------------------------
+//                         FILE DETAILS
+// Project          : LatticeMico32
+// File             : lm32_top.v
+// Title            : Top-level of CPU.
+// Dependencies     : lm32_include.v
+// Version          : 6.1.17
+//                  : removed SPI - 04/12/07
+// Version          : 7.0SP2, 3.0
+//                  : No Change
+// Version          : 3.1
+//                  : No Change
+// =============================================================================
+
+`include "lm32_include.v"
+
+/////////////////////////////////////////////////////
+// Module interface
+/////////////////////////////////////////////////////
+
+module lm32_top (
+    // ----- Inputs -------
+    clk_i,
+    rst_i,
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+    at_debug,
+ `endif
+`endif
+    // From external devices
+`ifdef CFG_INTERRUPTS_ENABLED
+    interrupt,
+`endif
+    // From user logic
+`ifdef CFG_USER_ENABLED
+    user_result,
+    user_complete,
+`endif     
+`ifdef CFG_IWB_ENABLED
+    // Instruction Wishbone master
+    I_DAT_I,
+    I_ACK_I,
+    I_ERR_I,
+    I_RTY_I,
+`endif
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+    ext_break,
+`endif
+    // Data Wishbone master
+    D_DAT_I,
+    D_ACK_I,
+    D_ERR_I,
+    D_RTY_I,
+    // ----- Outputs -------
+`ifdef CFG_USER_ENABLED    
+    user_valid,
+    user_opcode,
+    user_operand_0,
+    user_operand_1,
+`endif    
+`ifdef CFG_IWB_ENABLED
+    // Instruction Wishbone master
+    I_DAT_O,
+    I_ADR_O,
+    I_CYC_O,
+    I_SEL_O,
+    I_STB_O,
+    I_WE_O,
+    I_CTI_O,
+    I_LOCK_O,
+    I_BTE_O,
+`endif
+    // Data Wishbone master
+    D_DAT_O,
+    D_ADR_O,
+    D_CYC_O,
+    D_SEL_O,
+    D_STB_O,
+    D_WE_O,
+    D_CTI_O,
+    D_LOCK_O,
+    D_BTE_O
+    );
+
+/////////////////////////////////////////////////////
+// Inputs
+/////////////////////////////////////////////////////
+
+input clk_i;                                    // Clock
+input rst_i;                                    // Reset
+
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+   input at_debug;                              // GPIO input that maps EBA to DEBA
+ `endif
+`endif
+
+`ifdef CFG_INTERRUPTS_ENABLED
+input [`LM32_INTERRUPT_RNG] interrupt;          // Interrupt pins
+`endif
+
+`ifdef CFG_USER_ENABLED
+input [`LM32_WORD_RNG] user_result;             // User-defined instruction result
+input user_complete;                            // Indicates the user-defined instruction result is valid
+`endif    
+
+`ifdef CFG_IWB_ENABLED
+input [`LM32_WORD_RNG] I_DAT_I;                 // Instruction Wishbone interface read data
+input I_ACK_I;                                  // Instruction Wishbone interface acknowledgement
+input I_ERR_I;                                  // Instruction Wishbone interface error
+input I_RTY_I;                                  // Instruction Wishbone interface retry
+`endif
+
+input [`LM32_WORD_RNG] D_DAT_I;                 // Data Wishbone interface read data
+input D_ACK_I;                                  // Data Wishbone interface acknowledgement
+input D_ERR_I;                                  // Data Wishbone interface error
+input D_RTY_I;                                  // Data Wishbone interface retry
+
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+input ext_break;
+`endif
+
+/////////////////////////////////////////////////////
+// Outputs
+/////////////////////////////////////////////////////
+
+`ifdef CFG_USER_ENABLED
+output user_valid;                              // Indicates that user_opcode and user_operand_* are valid
+wire   user_valid;
+output [`LM32_USER_OPCODE_RNG] user_opcode;     // User-defined instruction opcode
+reg    [`LM32_USER_OPCODE_RNG] user_opcode;
+output [`LM32_WORD_RNG] user_operand_0;         // First operand for user-defined instruction
+wire   [`LM32_WORD_RNG] user_operand_0;
+output [`LM32_WORD_RNG] user_operand_1;         // Second operand for user-defined instruction
+wire   [`LM32_WORD_RNG] user_operand_1;
+`endif
+
+`ifdef CFG_IWB_ENABLED
+output [`LM32_WORD_RNG] I_DAT_O;                // Instruction Wishbone interface write data
+wire   [`LM32_WORD_RNG] I_DAT_O;
+output [`LM32_WORD_RNG] I_ADR_O;                // Instruction Wishbone interface address
+wire   [`LM32_WORD_RNG] I_ADR_O;
+output I_CYC_O;                                 // Instruction Wishbone interface cycle
+wire   I_CYC_O;
+output [`LM32_BYTE_SELECT_RNG] I_SEL_O;         // Instruction Wishbone interface byte select
+wire   [`LM32_BYTE_SELECT_RNG] I_SEL_O;
+output I_STB_O;                                 // Instruction Wishbone interface strobe
+wire   I_STB_O;
+output I_WE_O;                                  // Instruction Wishbone interface write enable
+wire   I_WE_O;
+output [`LM32_CTYPE_RNG] I_CTI_O;               // Instruction Wishbone interface cycle type 
+wire   [`LM32_CTYPE_RNG] I_CTI_O;
+output I_LOCK_O;                                // Instruction Wishbone interface lock bus
+wire   I_LOCK_O;
+output [`LM32_BTYPE_RNG] I_BTE_O;               // Instruction Wishbone interface burst type 
+wire   [`LM32_BTYPE_RNG] I_BTE_O;
+`endif
+
+output [`LM32_WORD_RNG] D_DAT_O;                // Data Wishbone interface write data
+wire   [`LM32_WORD_RNG] D_DAT_O;
+output [`LM32_WORD_RNG] D_ADR_O;                // Data Wishbone interface address
+wire   [`LM32_WORD_RNG] D_ADR_O;
+output D_CYC_O;                                 // Data Wishbone interface cycle
+wire   D_CYC_O;
+output [`LM32_BYTE_SELECT_RNG] D_SEL_O;         // Data Wishbone interface byte select
+wire   [`LM32_BYTE_SELECT_RNG] D_SEL_O;
+output D_STB_O;                                 // Data Wishbone interface strobe
+wire   D_STB_O;
+output D_WE_O;                                  // Data Wishbone interface write enable
+wire   D_WE_O;
+output [`LM32_CTYPE_RNG] D_CTI_O;               // Data Wishbone interface cycle type 
+wire   [`LM32_CTYPE_RNG] D_CTI_O;
+output D_LOCK_O;                                // Date Wishbone interface lock bus
+wire   D_LOCK_O;
+output [`LM32_BTYPE_RNG] D_BTE_O;               // Data Wishbone interface burst type 
+wire   [`LM32_BTYPE_RNG] D_BTE_O;
+
+/////////////////////////////////////////////////////
+// Internal nets and registers 
+/////////////////////////////////////////////////////
+`ifdef CFG_JTAG_ENABLED
+// Signals between JTAG interface and CPU
+wire [`LM32_BYTE_RNG] jtag_reg_d;
+wire [`LM32_BYTE_RNG] jtag_reg_q;
+wire jtag_update;
+wire [2:0] jtag_reg_addr_d;
+wire [2:0] jtag_reg_addr_q;
+wire jtck;
+wire jrstn;
+`endif
+
+// TODO: get the trace signals out
+`ifdef CFG_TRACE_ENABLED
+// PC trace signals
+wire [`LM32_PC_RNG] trace_pc;                   // PC to trace (address of next non-sequential instruction)
+wire trace_pc_valid;                            // Indicates that a new trace PC is valid
+wire trace_exception;                           // Indicates an exception has occured
+wire [`LM32_EID_RNG] trace_eid;                 // Indicates what type of exception has occured
+wire trace_eret;                                // Indicates an eret instruction has been executed
+`ifdef CFG_DEBUG_ENABLED
+wire trace_bret;                                // Indicates a bret instruction has been executed
+`endif
+`endif
+
+/////////////////////////////////////////////////////
+// Functions
+/////////////////////////////////////////////////////
+
+`include "lm32_functions.v"
+/////////////////////////////////////////////////////
+// Instantiations
+///////////////////////////////////////////////////// 
+   
+// LM32 CPU   
+lm32_cpu cpu (
+    // ----- Inputs -------
+    .clk_i                 (clk_i),
+`ifdef CFG_EBR_NEGEDGE_REGISTER_FILE
+    .clk_n_i               (clk_n),
+`endif
+    .rst_i                 (rst_i),
+`ifdef CFG_DEBUG_ENABLED
+ `ifdef CFG_ALTERNATE_EBA
+    .at_debug              (at_debug),
+ `endif
+`endif
+    // From external devices
+`ifdef CFG_INTERRUPTS_ENABLED
+    .interrupt             (interrupt),
+`endif
+    // From user logic
+`ifdef CFG_USER_ENABLED
+    .user_result           (user_result),
+    .user_complete         (user_complete),
+`endif     
+`ifdef CFG_JTAG_ENABLED
+    // From JTAG
+    .jtag_clk              (jtck),
+    .jtag_update           (jtag_update),
+    .jtag_reg_q            (jtag_reg_q),
+    .jtag_reg_addr_q       (jtag_reg_addr_q),
+`endif
+`ifdef CFG_EXTERNAL_BREAK_ENABLED
+    .ext_break             (ext_break),
+`endif
+`ifdef CFG_IWB_ENABLED
+     // Instruction Wishbone master
+    .I_DAT_I               (I_DAT_I),
+    .I_ACK_I               (I_ACK_I),
+    .I_ERR_I               (I_ERR_I),
+    .I_RTY_I               (I_RTY_I),
+`endif
+    // Data Wishbone master
+    .D_DAT_I               (D_DAT_I),
+    .D_ACK_I               (D_ACK_I),
+    .D_ERR_I               (D_ERR_I),
+    .D_RTY_I               (D_RTY_I),
+    // ----- Outputs -------
+`ifdef CFG_TRACE_ENABLED
+    .trace_pc              (trace_pc),
+    .trace_pc_valid        (trace_pc_valid),
+    .trace_exception       (trace_exception),
+    .trace_eid             (trace_eid),
+    .trace_eret            (trace_eret),
+`ifdef CFG_DEBUG_ENABLED
+    .trace_bret            (trace_bret),
+`endif
+`endif
+`ifdef CFG_JTAG_ENABLED
+    .jtag_reg_d            (jtag_reg_d),
+    .jtag_reg_addr_d       (jtag_reg_addr_d),
+`endif
+`ifdef CFG_USER_ENABLED    
+    .user_valid            (user_valid),
+    .user_opcode           (user_opcode),
+    .user_operand_0        (user_operand_0),
+    .user_operand_1        (user_operand_1),
+`endif    
+`ifdef CFG_IWB_ENABLED
+    // Instruction Wishbone master
+    .I_DAT_O               (I_DAT_O),
+    .I_ADR_O               (I_ADR_O),
+    .I_CYC_O               (I_CYC_O),
+    .I_SEL_O               (I_SEL_O),
+    .I_STB_O               (I_STB_O),
+    .I_WE_O                (I_WE_O),
+    .I_CTI_O               (I_CTI_O),
+    .I_LOCK_O              (I_LOCK_O),
+    .I_BTE_O               (I_BTE_O),
+    `endif
+    // Data Wishbone master
+    .D_DAT_O               (D_DAT_O),
+    .D_ADR_O               (D_ADR_O),
+    .D_CYC_O               (D_CYC_O),
+    .D_SEL_O               (D_SEL_O),
+    .D_STB_O               (D_STB_O),
+    .D_WE_O                (D_WE_O),
+    .D_CTI_O               (D_CTI_O),
+    .D_LOCK_O              (D_LOCK_O),
+    .D_BTE_O               (D_BTE_O)
+    );
+   
+`ifdef CFG_JTAG_ENABLED                   
+// JTAG cores 
+jtag_cores jtag_cores (
+    // ----- Inputs -----
+    .reg_d                 (jtag_reg_d),
+    .reg_addr_d            (jtag_reg_addr_d),
+    // ----- Outputs -----
+    .reg_update            (jtag_update),
+    .reg_q                 (jtag_reg_q),
+    .reg_addr_q            (jtag_reg_addr_q),
+    .jtck                  (jtck),
+    .jrstn                 (jrstn)
+    );
+`endif        
+   
+endmodule
diff --git a/verilog/uart/uart.v b/verilog/uart/uart.v
new file mode 100644 (file)
index 0000000..6412804
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Milkymist SoC
+ * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+module uart #(
+       parameter csr_addr = 4'h0,
+       parameter clk_freq = 100000000,
+       parameter baud = 115200,
+       parameter break_en_default = 1'b0
+) (
+       input sys_clk,
+       input sys_rst,
+       
+       input [13:0] csr_a,
+       input csr_we,
+       input [31:0] csr_di,
+       output reg [31:0] csr_do,
+
+       output irq,
+
+       input uart_rx,
+       output uart_tx,
+
+       output break
+);
+
+reg [15:0] divisor;
+wire [7:0] rx_data;
+wire [7:0] tx_data;
+wire tx_wr;
+
+wire uart_tx_transceiver;
+
+uart_transceiver transceiver(
+       .sys_clk(sys_clk),
+       .sys_rst(sys_rst),
+
+       .uart_rx(uart_rx),
+       .uart_tx(uart_tx_transceiver),
+
+       .divisor(divisor),
+
+       .rx_data(rx_data),
+       .rx_done(rx_done),
+
+       .tx_data(tx_data),
+       .tx_wr(tx_wr),
+       .tx_done(tx_done),
+
+       .break(break_transceiver)
+);
+
+assign uart_tx = thru_en ? uart_rx : uart_tx_transceiver;
+assign break = break_en & break_transceiver;
+
+/* CSR interface */
+wire csr_selected = csr_a[13:10] == csr_addr;
+
+assign irq = (tx_event & tx_irq_en) | (rx_event & rx_irq_en);
+
+assign tx_data = csr_di[7:0];
+assign tx_wr = csr_selected & csr_we & (csr_a[2:0] == 3'b000);
+
+parameter default_divisor = clk_freq/baud/16;
+
+reg thru_en;
+reg break_en;
+reg tx_irq_en;
+reg rx_irq_en;
+reg rx_event;
+reg tx_event;
+reg thre;
+
+always @(posedge sys_clk) begin
+       if(sys_rst) begin
+               divisor <= default_divisor;
+               csr_do <= 32'd0;
+               thru_en <= 1'b0;
+               break_en <= break_en_default;
+               rx_irq_en <= 1'b0;
+               tx_irq_en <= 1'b0;
+               tx_event <= 1'b0;
+               rx_event <= 1'b0;
+               thre <= 1'b1;
+       end else begin
+               csr_do <= 32'd0;
+               if(break)
+                       break_en <= 1'b0;
+               if(tx_done) begin
+                       tx_event <= 1'b1;
+                       thre <= 1'b1;
+               end
+               if(tx_wr)
+                       thre <= 1'b0;
+               if(rx_done) begin
+                       rx_event <= 1'b1;
+               end
+               if(csr_selected) begin
+                       case(csr_a[2:0])
+                               3'b000: csr_do <= rx_data;
+                               3'b001: csr_do <= divisor;
+                               3'b010: csr_do <= {tx_event, rx_event, thre};
+                               3'b011: csr_do <= {thru_en, tx_irq_en, rx_irq_en};
+                               3'b100: csr_do <= {break_en};
+                       endcase
+                       if(csr_we) begin
+                               case(csr_a[2:0])
+                                       3'b000:; /* handled by transceiver */
+                                       3'b001: divisor <= csr_di[15:0];
+                                       3'b010: begin
+                                               /* write one to clear */
+                                               if(csr_di[1])
+                                                       rx_event <= 1'b0;
+                                               if(csr_di[2])
+                                                       tx_event <= 1'b0;
+                                       end
+                                       3'b011: begin
+                                               rx_irq_en <= csr_di[0];
+                                               tx_irq_en <= csr_di[1];
+                                               thru_en <= csr_di[2];
+                                       end
+                                       3'b100: break_en <= csr_di[0];
+                               endcase
+                       end
+               end
+       end
+end
+
+endmodule
diff --git a/verilog/uart/uart_transceiver.v b/verilog/uart/uart_transceiver.v
new file mode 100644 (file)
index 0000000..80bd93b
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * Milkymist SoC
+ * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
+ * Copyright (C) 2007 Das Labor
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+module uart_transceiver(
+       input sys_rst,
+       input sys_clk,
+
+       input uart_rx,
+       output reg uart_tx,
+
+       input [15:0] divisor,
+
+       output reg [7:0] rx_data,
+       output reg rx_done,
+
+       input [7:0] tx_data,
+       input tx_wr,
+       output reg tx_done,
+
+       output reg break
+);
+
+//-----------------------------------------------------------------
+// enable16 generator
+//-----------------------------------------------------------------
+reg [15:0] enable16_counter;
+
+wire enable16;
+assign enable16 = (enable16_counter == 16'd0);
+
+always @(posedge sys_clk) begin
+       if(sys_rst)
+               enable16_counter <= divisor - 16'b1;
+       else begin
+               enable16_counter <= enable16_counter - 16'd1;
+               if(enable16)
+                       enable16_counter <= divisor - 16'b1;
+       end
+end
+
+//-----------------------------------------------------------------
+// Synchronize uart_rx
+//-----------------------------------------------------------------
+reg uart_rx1;
+reg uart_rx2;
+
+always @(posedge sys_clk) begin
+       uart_rx1 <= uart_rx;
+       uart_rx2 <= uart_rx1;
+end
+
+//-----------------------------------------------------------------
+// UART RX Logic
+//-----------------------------------------------------------------
+reg rx_busy;
+reg uart_rx_r;
+reg [3:0] rx_count16;
+reg [3:0] rx_bitcount;
+reg [7:0] rx_reg;
+
+always @(posedge sys_clk) begin
+       if(sys_rst) begin
+               rx_done <= 1'b0;
+               rx_busy <= 1'b0;
+               rx_count16  <= 4'd0;
+               rx_bitcount <= 4'd0;
+               break <= 1'b0;
+               uart_rx_r <= 1'b0;
+       end else begin
+               rx_done <= 1'b0;
+               break <= 1'b0;
+
+               if(enable16) begin
+                       uart_rx_r <= uart_rx2;
+                       if(~rx_busy) begin // look for start bit
+                               if(~uart_rx2 & uart_rx_r) begin // start bit found
+                                       rx_busy <= 1'b1;
+                                       rx_count16 <= 4'd7;
+                                       rx_bitcount <= 4'd0;
+                               end
+                       end else begin
+                               rx_count16 <= rx_count16 + 4'd1;
+
+                               if(rx_count16 == 4'd0) begin // sample
+                                       rx_bitcount <= rx_bitcount + 4'd1;
+
+                                       if(rx_bitcount == 4'd0) begin // verify startbit
+                                               if(uart_rx2)
+                                                       rx_busy <= 1'b0;
+                                       end else if(rx_bitcount == 4'd9) begin
+                                               rx_busy <= 1'b0;
+                                               if(uart_rx2) begin // stop bit ok
+                                                       rx_data <= rx_reg;
+                                                       rx_done <= 1'b1;
+                                               end else if(rx_reg == 8'h00) // break condition
+                                                       break <= 1'b1;
+                                       end else
+                                               rx_reg <= {uart_rx2, rx_reg[7:1]};
+                               end
+                       end
+               end
+       end
+end
+
+//-----------------------------------------------------------------
+// UART TX Logic
+//-----------------------------------------------------------------
+reg tx_busy;
+reg [3:0] tx_bitcount;
+reg [3:0] tx_count16;
+reg [7:0] tx_reg;
+
+always @(posedge sys_clk) begin
+       if(sys_rst) begin
+               tx_done <= 1'b0;
+               tx_busy <= 1'b0;
+               uart_tx <= 1'b1;
+       end else begin
+               tx_done <= 1'b0;
+               if(tx_wr) begin
+                       tx_reg <= tx_data;
+                       tx_bitcount <= 4'd0;
+                       tx_count16 <= 4'd1;
+                       tx_busy <= 1'b1;
+                       uart_tx <= 1'b0;
+`ifdef SIMULATION
+                       $display("UART: %c", tx_data);
+`endif
+               end else if(enable16 && tx_busy) begin
+                       tx_count16  <= tx_count16 + 4'd1;
+
+                       if(tx_count16 == 4'd0) begin
+                               tx_bitcount <= tx_bitcount + 4'd1;
+                               
+                               if(tx_bitcount == 4'd8) begin
+                                       uart_tx <= 1'b1;
+                               end else if(tx_bitcount == 4'd9) begin
+                                       uart_tx <= 1'b1;
+                                       tx_busy <= 1'b0;
+                                       tx_done <= 1'b1;
+                               end else begin
+                                       uart_tx <= tx_reg[0];
+                                       tx_reg <= {1'b0, tx_reg[7:1]};
+                               end
+                       end
+               end
+       end
+end
+
+endmodule