* https://bugs.libre-soc.org/show_bug.cgi?id=216
* https://libre-soc.org/3d_gpu/architecture/memory_and_cache/
+* https://bugs.libre-soc.org/show_bug.cgi?id=465 - exception handling
"""
from nmigen.compat.sim import run_simulation, Settle
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Mux, Elaboratable, Array, Cat
+from nmigen.cli import rtlil
+from nmigen import Module, Signal, Mux, Elaboratable, Cat, Const
from nmutil.iocontrol import RecordObject
from nmigen.utils import log2_int
-from nmigen.hdl.rec import Record, Layout
from nmutil.latch import SRLatch, latchregister
-from soc.decoder.power_decoder2 import Data
-from soc.decoder.power_enums import InternalOp
-from soc.regfile.regfile import ortreereduce
-from nmutil.util import treereduce
-
-from soc.decoder.power_decoder2 import Data
-#from nmutil.picker import PriorityPicker
-from nmigen.lib.coding import PriorityEncoder
-from soc.scoreboard.addr_split import LDSTSplitter
+from nmutil.util import rising_edge
+from openpower.decoder.power_decoder2 import Data
+from openpower.decoder.power_enums import MSRSpec
from soc.scoreboard.addr_match import LenExpand
+from soc.experiment.mem_types import LDSTException
# for testing purposes
-from soc.experiment.testmem import TestMemory # TODO: replace with TMLSUI
-# TODO: from soc.experiment.testmem import TestMemoryLoadStoreUnit
+from soc.experiment.testmem import TestMemory
+#from soc.scoreboard.addr_split import LDSTSplitter
+from nmutil.util import Display
import unittest
for the L0 Cache/Buffer to have an additional address latch
(because the LDSTCompUnit already has it)
- * addr_ok_o (or addr_exc_o) must be waited for. these will
+ * addr_ok_o (or exception.happened) must be waited for. these will
be asserted *only* for one cycle and one cycle only.
- * addr_exc_o will be asserted if there is no chance that the
+ * exception.happened will be asserted if there is no chance that the
memory request may be fulfilled.
- busy_o is deasserted on the same cycle as addr_exc_o is asserted.
+ busy_o is deasserted on the same cycle as exception.happened is asserted.
* conversely: addr_ok_o must *ONLY* be asserted if there is a
HUNDRED PERCENT guarantee that the memory request will be
busy_o is deasserted on the cycle AFTER st.ok is asserted.
"""
- def __init__(self, name=None, regwid=64, addrwid=48):
+ def __init__(self, name=None, regwid=64, addrwid=64):
self._regwid = regwid
self._addrwid = addrwid
RecordObject.__init__(self, name=name)
- # distinguish op type (ld/st)
- self.is_ld_i = Signal(reset_less=True)
- self.is_st_i = Signal(reset_less=True)
+ # distinguish op type (ld/st/dcbz/nc)
+ self.is_ld_i = Signal(reset_less=True)
+ self.is_st_i = Signal(reset_less=True)
+ self.is_dcbz_i = Signal(reset_less=True) # cache-line zeroing
+ self.is_nc = Signal() # no cacheing
# LD/ST data length (TODO: other things may be needed)
self.data_len = Signal(4, reset_less=True)
+ # atomic reservation (LR/SC - ldarx / stdcx etc.)
+ self.reserve = Signal(reset_less=True)
+
# common signals
self.busy_o = Signal(reset_less=True) # do not use if busy
self.go_die_i = Signal(reset_less=True) # back to reset
self.addr = Data(addrwid, "addr_i") # addr/addr-ok
# addr is valid (TLB, L1 etc.)
self.addr_ok_o = Signal(reset_less=True)
- self.addr_exc_o = Signal(reset_less=True) # TODO, "type" of exception
+ self.exc_o = LDSTException("exc")
# LD/ST
self.ld = Data(regwid, "ld_data_o") # ok to be set by L0 Cache/Buf
self.st = Data(regwid, "st_data_i") # ok to be set by CompUnit
-
-
-class LDSTPort(Elaboratable):
- def __init__(self, idx, regwid=64, addrwid=48):
- self.pi = PortInterface("ldst_port%d" % idx, regwid, addrwid)
-
- def elaborate(self, platform):
- m = Module()
- comb, sync = m.d.comb, m.d.sync
-
- # latches
- m.submodules.busy_l = busy_l = SRLatch(False, name="busy")
- m.submodules.cyc_l = cyc_l = SRLatch(True, name="cyc")
- comb += cyc_l.s.eq(0)
- comb += cyc_l.r.eq(0)
-
- # this is a little weird: we let the L0Cache/Buffer set
- # the outputs: this module just monitors "state".
-
- # LD/ST requested activates "busy"
- with m.If(self.pi.is_ld_i | self.pi.is_st_i):
- comb += busy_l.s.eq(1)
-
- # monitor for an exception or the completion of LD.
- with m.If(self.pi.addr_exc_o):
- comb += busy_l.r.eq(1)
-
- # however ST needs one cycle before busy is reset
- with m.If(self.pi.st.ok | self.pi.ld.ok):
- comb += cyc_l.s.eq(1)
-
- with m.If(cyc_l.q):
- comb += cyc_l.r.eq(1)
- comb += busy_l.r.eq(1)
-
- # busy latch outputs to interface
- comb += self.pi.busy_o.eq(busy_l.q)
-
- return m
-
- def __iter__(self):
- yield self.pi.is_ld_i
- yield self.pi.is_st_i
- yield from self.pi.op.ports()
- yield self.pi.busy_o
- yield self.pi.go_die_i
- yield from self.pi.addr.ports()
- yield self.pi.addr_ok_o
- yield self.pi.addr_exc_o
-
- yield from self.pi.ld.ports()
- yield from self.pi.st.ports()
-
- def ports(self):
- return list(self)
-
-
-class TestMemoryPortInterface(Elaboratable):
- """TestMemoryPortInterface
-
- This is a test class for simple verification of the LDSTCompUnit
- and for the simple core, to be able to run unit tests rapidly and
- with less other code in the way.
-
- Versions of this which are *compatible* (conform with PortInterface)
- will include augmented-Wishbone Bus versions, including ones that
- connect to L1, L2, MMU etc. etc. however this is the "base lowest
- possible version that complies with PortInterface".
+ self.store_done = Data(1, "store_done_o") # store has been actioned
+
+ #only priv_mode = not msr_pr is used currently
+ # TODO: connect signals
+ self.virt_mode = Signal() # ctrl.msr(MSR_DR);
+ self.priv_mode = Signal() # not ctrl.msr(MSR_PR);
+ self.mode_32bit = Signal() # not ctrl.msr(MSR_SF);
+
+ # dcache
+ self.ldst_error = Signal()
+ ## Signalling ld/st error - NC cache hit, TLB miss, prot/RC failure
+ self.cache_paradox = Signal()
+
+ def connect_port(self, inport):
+ print("connect_port", self, inport)
+ return [self.is_ld_i.eq(inport.is_ld_i),
+ self.is_st_i.eq(inport.is_st_i),
+ self.is_nc.eq(inport.is_nc),
+ self.is_dcbz_i.eq(inport.is_dcbz_i),
+ self.data_len.eq(inport.data_len),
+ self.reserve.eq(inport.reserve),
+ self.go_die_i.eq(inport.go_die_i),
+ self.addr.data.eq(inport.addr.data),
+ self.addr.ok.eq(inport.addr.ok),
+ self.st.eq(inport.st),
+ self.virt_mode.eq(inport.virt_mode),
+ self.priv_mode.eq(inport.priv_mode),
+ self.mode_32bit.eq(inport.mode_32bit),
+ inport.ld.eq(self.ld),
+ inport.busy_o.eq(self.busy_o),
+ inport.addr_ok_o.eq(self.addr_ok_o),
+ inport.exc_o.eq(self.exc_o),
+ inport.store_done.eq(self.store_done),
+ inport.ldst_error.eq(self.ldst_error),
+ inport.cache_paradox.eq(self.cache_paradox)
+ ]
+
+
+class PortInterfaceBase(Elaboratable):
+ """PortInterfaceBase
+
+ Base class for PortInterface-compliant Memory read/writers
"""
def __init__(self, regwid=64, addrwid=4):
- self.mem = TestMemory(regwid, addrwid, granularity=regwid//8)
self.regwid = regwid
self.addrwid = addrwid
- self.pi = LDSTPort(0, regwid, addrwid)
+ self.pi = PortInterface("ldst_port0", regwid, addrwid)
@property
def addrbits(self):
- return log2_int(self.mem.regwid//8)
+ return log2_int(self.regwid//8)
def splitaddr(self, addr):
"""split the address into top and bottom bits of the memory granularity
"""
return addr[:self.addrbits], addr[self.addrbits:]
+ def connect_port(self, inport):
+ return self.pi.connect_port(inport)
+
+ def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc): pass
+ def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc): pass
+ def set_wr_data(self, m, data, wen): pass
+ def get_rd_data(self, m): pass
+
def elaborate(self, platform):
m = Module()
comb, sync = m.d.comb, m.d.sync
- # add TestMemory as submodule
- m.submodules.mem = self.mem
-
- # connect the ports as modules
- m.submodules.port0 = self.pi
-
# state-machine latches
m.submodules.st_active = st_active = SRLatch(False, name="st_active")
+ m.submodules.st_done = st_done = SRLatch(False, name="st_done")
m.submodules.ld_active = ld_active = SRLatch(False, name="ld_active")
m.submodules.reset_l = reset_l = SRLatch(True, name="reset")
m.submodules.adrok_l = adrok_l = SRLatch(False, name="addr_acked")
+ m.submodules.busy_l = busy_l = SRLatch(False, name="busy")
+ m.submodules.cyc_l = cyc_l = SRLatch(True, name="cyc")
+
+ self.busy_l = busy_l
+
+ sync += st_done.s.eq(0)
+ comb += st_done.r.eq(0)
+ comb += st_active.r.eq(0)
+ comb += ld_active.r.eq(0)
+ comb += cyc_l.s.eq(0)
+ comb += cyc_l.r.eq(0)
+ comb += busy_l.s.eq(0)
+ comb += busy_l.r.eq(0)
+ sync += adrok_l.s.eq(0)
+ comb += adrok_l.r.eq(0)
# expand ld/st binary length/addr[:3] into unary bitmap
m.submodules.lenexp = lenexp = LenExpand(4, 8)
lds = Signal(reset_less=True)
sts = Signal(reset_less=True)
- pi = self.pi.pi
- comb += lds.eq(pi.is_ld_i & pi.busy_o) # ld-req signals
- comb += sts.eq(pi.is_st_i & pi.busy_o) # st-req signals
-
- # convenience variables to reference the "picked" port
- ldport = pi
- stport = pi
- # and the memory ports
- rdport = self.mem.rdport
- wrport = self.mem.wrport
-
- # Priority-Pickers pick one and only one request, capture its index.
- # from that point on this code *only* "listens" to that port.
-
- sync += adrok_l.s.eq(0)
- comb += adrok_l.r.eq(0)
- with m.If(lds):
- comb += ld_active.s.eq(1) # activate LD mode
- with m.Elif(sts):
- comb += st_active.s.eq(1) # activate ST mode
-
- # from this point onwards, with the port "picked", it stays picked
- # until ld_active (or st_active) are de-asserted.
+ pi = self.pi
+ comb += lds.eq(pi.is_ld_i) # ld-req signals
+ comb += sts.eq(pi.is_st_i) # st-req signals
+
+ # TODO: construct an MSRspec here and pass it over in
+ # self.set_rd_addr and set_wr_addr below rather than just pr
+ pr = ~pi.priv_mode
+ dr = pi.virt_mode
+ sf = ~pi.mode_32bit
+ msr = MSRSpec(pr=pr, dr=dr, sf=sf)
+
+ # detect busy "edge"
+ busy_delay = Signal()
+ busy_edge = Signal()
+ sync += busy_delay.eq(pi.busy_o)
+ comb += busy_edge.eq(pi.busy_o & ~busy_delay)
+
+ # misalignment detection: bits at end of lenexpand are set.
+ # when using the L0CacheBuffer "data expander" which splits requests
+ # into *two* PortInterfaces, this acts as a "safety check".
+ misalign = Signal()
+ comb += misalign.eq(lenexp.lexp_o[8:].bool())
+
+ # activate mode: only on "edge"
+ comb += ld_active.s.eq(rising_edge(m, lds)) # activate LD mode
+ comb += st_active.s.eq(rising_edge(m, sts)) # activate ST mode
+
+ # LD/ST requested activates "busy" (only if not already busy)
+ with m.If(self.pi.is_ld_i | self.pi.is_st_i):
+ comb += busy_l.s.eq(~busy_delay)
+ with m.If(self.pi.exc_o.happened):
+ sync += Display("fast exception")
# if now in "LD" mode: wait for addr_ok, then send the address out
# to memory, acknowledge address, and send out LD data
with m.If(ld_active.q):
# set up LenExpander with the LD len and lower bits of addr
- lsbaddr, msbaddr = self.splitaddr(ldport.addr.data)
- comb += lenexp.len_i.eq(ldport.data_len)
+ lsbaddr, msbaddr = self.splitaddr(pi.addr.data)
+ comb += lenexp.len_i.eq(pi.data_len)
comb += lenexp.addr_i.eq(lsbaddr)
- with m.If(ldport.addr.ok & adrok_l.qn):
- comb += rdport.addr.eq(msbaddr) # addr ok, send thru
- comb += ldport.addr_ok_o.eq(1) # acknowledge addr ok
+ with m.If(pi.addr.ok & adrok_l.qn):
+ self.set_rd_addr(m, pi.addr.data, lenexp.lexp_o, misalign,
+ msr, pi.is_nc)
+ comb += pi.addr_ok_o.eq(1) # acknowledge addr ok
sync += adrok_l.s.eq(1) # and pull "ack" latch
# if now in "ST" mode: likewise do the same but with "ST"
# to memory, acknowledge address, and send out LD data
with m.If(st_active.q):
# set up LenExpander with the ST len and lower bits of addr
- lsbaddr, msbaddr = self.splitaddr(stport.addr.data)
- comb += lenexp.len_i.eq(stport.data_len)
+ lsbaddr, msbaddr = self.splitaddr(pi.addr.data)
+ comb += lenexp.len_i.eq(pi.data_len)
comb += lenexp.addr_i.eq(lsbaddr)
- with m.If(stport.addr.ok):
- comb += wrport.addr.eq(msbaddr) # addr ok, send thru
- with m.If(adrok_l.qn):
- comb += stport.addr_ok_o.eq(1) # acknowledge addr ok
+ with m.If(pi.addr.ok):
+ self.set_wr_addr(m, pi.addr.data, lenexp.lexp_o, misalign, msr,
+ pi.is_dcbz_i, pi.is_nc)
+ with m.If(adrok_l.qn & self.pi.exc_o.happened==0):
+ comb += pi.addr_ok_o.eq(1) # acknowledge addr ok
sync += adrok_l.s.eq(1) # and pull "ack" latch
- # NOTE: in both these, below, the port itself takes care
- # of de-asserting its "busy_o" signal, based on either ld.ok going
- # high (by us, here) or by st.ok going high (by the LDSTCompUnit).
-
# for LD mode, when addr has been "ok'd", assume that (because this
# is a "Memory" test-class) the memory read data is valid.
comb += reset_l.s.eq(0)
comb += reset_l.r.eq(0)
+ lddata = Signal(self.regwid, reset_less=True)
+ data, ldok = self.get_rd_data(m)
+ comb += lddata.eq((data & lenexp.rexp_o) >>
+ (lenexp.addr_i*8))
with m.If(ld_active.q & adrok_l.q):
# shift data down before pushing out. requires masking
# from the *byte*-expanded version of LenExpand output
- lddata = Signal(self.regwid, reset_less=True)
- # TODO: replace rdport.data with LoadStoreUnitInterface.x_load_data
- # and also handle the ready/stall/busy protocol
- comb += lddata.eq((rdport.data & lenexp.rexp_o) >>
- (lenexp.addr_i*8))
- comb += ldport.ld.data.eq(lddata) # put data out
- comb += ldport.ld.ok.eq(1) # indicate data valid
- comb += reset_l.s.eq(1) # reset mode after 1 cycle
+ comb += pi.ld.data.eq(lddata) # put data out
+ comb += pi.ld.ok.eq(ldok) # indicate data valid
+ comb += reset_l.s.eq(ldok) # reset mode after 1 cycle
# for ST mode, when addr has been "ok'd", wait for incoming "ST ok"
- with m.If(st_active.q & stport.st.ok):
+ sync += st_done.s.eq(0) # store done trigger
+ with m.If(st_active.q & pi.st.ok):
# shift data up before storing. lenexp *bit* version of mask is
# passed straight through as byte-level "write-enable" lines.
- stdata = Signal(self.regwid, reset_less=True)
- comb += stdata.eq(stport.st.data << (lenexp.addr_i*8))
+ stdata = Signal(self.regwid*2, reset_less=True)
+ comb += stdata.eq(pi.st.data << (lenexp.addr_i*8))
# TODO: replace with link to LoadStoreUnitInterface.x_store_data
# and also handle the ready/stall/busy protocol
- comb += wrport.data.eq(stdata) # write st to mem
- comb += wrport.en.eq(lenexp.lexp_o) # enable writes
- comb += reset_l.s.eq(1) # reset mode after 1 cycle
+ stok = self.set_wr_data(m, stdata, lenexp.lexp_o)
+ sync += st_done.s.eq(~self.pi.exc_o.happened) # store done trigger
+ with m.If(st_done.q):
+ comb += reset_l.s.eq(stok) # reset mode after 1 cycle
# ugly hack, due to simultaneous addr req-go acknowledge
reset_delay = Signal(reset_less=True)
# after waiting one cycle (reset_l is "sync" mode), reset the port
with m.If(reset_l.q):
- comb += ld_active.r.eq(1) # leave the ST active for 1 cycle
+ comb += ld_active.r.eq(1) # leave the LD active for 1 cycle
comb += st_active.r.eq(1) # leave the ST active for 1 cycle
comb += reset_l.r.eq(1) # clear reset
comb += adrok_l.r.eq(1) # address reset
+ comb += st_done.r.eq(1) # store done reset
- return m
-
- def ports(self):
- for p in self.dports:
- yield from p.ports()
-
-
-def wait_busy(port, no=False):
- while True:
- busy = yield port.pi.busy_o
- print("busy", no, busy)
- if bool(busy) == no:
- break
- yield
-
-
-def wait_addr(port):
- while True:
- addr_ok = yield port.pi.addr_ok_o
- print("addrok", addr_ok)
- if not addr_ok:
- break
- yield
-
-
-def wait_ldok(port):
- while True:
- ldok = yield port.pi.ld.ok
- print("ldok", ldok)
- if ldok:
- break
- yield
-
-
-def l0_cache_st(dut, addr, data, datalen):
- mem = dut.mem
- port1 = dut.pi
-
- # have to wait until not busy
- yield from wait_busy(port1, no=False) # wait until not busy
-
- # set up a ST on the port. address first:
- yield port1.pi.is_st_i.eq(1) # indicate ST
- yield port1.pi.data_len.eq(datalen) # ST length (1/2/4/8)
-
- yield port1.pi.addr.data.eq(addr) # set address
- yield port1.pi.addr.ok.eq(1) # set ok
- yield from wait_addr(port1) # wait until addr ok
- # yield # not needed, just for checking
- # yield # not needed, just for checking
- # assert "ST" for one cycle (required by the API)
- yield port1.pi.st.data.eq(data)
- yield port1.pi.st.ok.eq(1)
- yield
- yield port1.pi.st.ok.eq(0)
-
- # can go straight to reset.
- yield port1.pi.is_st_i.eq(0) # end
- yield port1.pi.addr.ok.eq(0) # set !ok
- # yield from wait_busy(port1, False) # wait until not busy
-
-
-def l0_cache_ld(dut, addr, datalen, expected):
+ # monitor for an exception, clear busy immediately
+ with m.If(self.pi.exc_o.happened):
+ comb += busy_l.r.eq(1)
+ comb += reset_l.s.eq(1) # also reset whole unit
- mem = dut.mem
- port1 = dut.pi
+ # however ST needs one cycle before busy is reset
+ #with m.If(self.pi.st.ok | self.pi.ld.ok):
+ with m.If(reset_l.s):
+ comb += cyc_l.s.eq(1)
- # have to wait until not busy
- yield from wait_busy(port1, no=False) # wait until not busy
+ with m.If(cyc_l.q):
+ comb += cyc_l.r.eq(1)
+ comb += busy_l.r.eq(1)
- # set up a LD on the port. address first:
- yield port1.pi.is_ld_i.eq(1) # indicate LD
- yield port1.pi.data_len.eq(datalen) # LD length (1/2/4/8)
+ # busy latch outputs to interface
+ if hasattr(self, "external_busy"):
+ # when there is an extra (external) busy, include that here.
+ # this is used e.g. in LoadStore1 when an instruction fault
+ # is being processed (instr_fault) and stops Load/Store requests
+ # from being made until it's done
+ comb += pi.busy_o.eq(busy_l.q | self.external_busy(m))
+ else:
+ comb += pi.busy_o.eq(busy_l.q)
- yield port1.pi.addr.data.eq(addr) # set address
- yield port1.pi.addr.ok.eq(1) # set ok
- yield from wait_addr(port1) # wait until addr ok
+ return m
- yield from wait_ldok(port1) # wait until ld ok
- data = yield port1.pi.ld.data
+ def ports(self):
+ yield from self.pi.ports()
- # cleanup
- yield port1.pi.is_ld_i.eq(0) # end
- yield port1.pi.addr.ok.eq(0) # set !ok
- # yield from wait_busy(port1, no=False) # wait until not busy
- return data
+class TestMemoryPortInterface(PortInterfaceBase):
+ """TestMemoryPortInterface
+ This is a test class for simple verification of the LDSTCompUnit
+ and for the simple core, to be able to run unit tests rapidly and
+ with less other code in the way.
-def l0_cache_ldst(arg, dut):
- yield
- addr = 0x2
- data = 0xbeef
- data2 = 0xf00f
- #data = 0x4
- yield from l0_cache_st(dut, 0x2, data, 2)
- yield from l0_cache_st(dut, 0x4, data2, 2)
- result = yield from l0_cache_ld(dut, 0x2, 2, data)
- result2 = yield from l0_cache_ld(dut, 0x4, 2, data2)
- yield
- arg.assertEqual(data, result, "data %x != %x" % (result, data))
- arg.assertEqual(data2, result2, "data2 %x != %x" % (result2, data2))
+ Versions of this which are *compatible* (conform with PortInterface)
+ will include augmented-Wishbone Bus versions, including ones that
+ connect to L1, L2, MMU etc. etc. however this is the "base lowest
+ possible version that complies with PortInterface".
+ """
+ def __init__(self, regwid=64, addrwid=4):
+ super().__init__(regwid, addrwid)
+ # hard-code memory addressing width to 6 bits
+ self.mem = TestMemory(regwid, 5, granularity=regwid//8, init=False)
+ def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc):
+ lsbaddr, msbaddr = self.splitaddr(addr)
+ m.d.comb += self.mem.wrport.addr.eq(msbaddr)
-class TestPIMem(unittest.TestCase):
+ def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc):
+ lsbaddr, msbaddr = self.splitaddr(addr)
+ m.d.comb += self.mem.rdport.addr.eq(msbaddr)
- def test_pi_mem(self):
+ def set_wr_data(self, m, data, wen):
+ m.d.comb += self.mem.wrport.data.eq(data) # write st to mem
+ m.d.comb += self.mem.wrport.en.eq(wen) # enable writes
+ return Const(1, 1)
- dut = TestMemoryPortInterface(regwid=64)
- #vl = rtlil.convert(dut, ports=dut.ports())
- #with open("test_basic_l0_cache.il", "w") as f:
- # f.write(vl)
+ def get_rd_data(self, m):
+ return self.mem.rdport.data, Const(1, 1)
- run_simulation(dut, l0_cache_ldst(self, dut),
- vcd_name='test_pi_mem_basic.vcd')
+ def elaborate(self, platform):
+ m = super().elaborate(platform)
+ # add TestMemory as submodule
+ m.submodules.mem = self.mem
-if __name__ == '__main__':
- unittest.main(exit=False)
+ return m
+ def ports(self):
+ yield from super().ports()
+ # TODO: memory ports