From: Luke Kenneth Casson Leighton Date: Mon, 22 Jun 2020 12:45:11 +0000 (+0100) Subject: add TestMemoryPortInterface class which is designed to replace L0CacheBuffer in X-Git-Tag: div_pipeline~296 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e9dbf557cd483d74755aa7257fac54651229aaae;p=soc.git add TestMemoryPortInterface class which is designed to replace L0CacheBuffer in unit tests, allowing L0CacheBuffer to be developed on its own terms --- diff --git a/src/soc/experiment/l0_cache.py b/src/soc/experiment/l0_cache.py index 6d267110..89f8b024 100644 --- a/src/soc/experiment/l0_cache.py +++ b/src/soc/experiment/l0_cache.py @@ -37,90 +37,11 @@ from soc.scoreboard.addr_match import LenExpand # for testing purposes from soc.experiment.testmem import TestMemory # TODO: replace with TMLSUI # TODO: from soc.experiment.testmem import TestMemoryLoadStoreUnit +from soc.experiment.pimem import PortInterface import unittest -class PortInterface(RecordObject): - """PortInterface - - defines the interface - the API - that the LDSTCompUnit connects - to. note that this is NOT a "fire-and-forget" interface. the - LDSTCompUnit *must* be kept appraised that the request is in - progress, and only when it has a 100% successful completion - can the notification be given (busy dropped). - - The interface FSM rules are as follows: - - * if busy_o is asserted, a LD/ST is in progress. further - requests may not be made until busy_o is deasserted. - - * only one of is_ld_i or is_st_i may be asserted. busy_o - will immediately be asserted and remain asserted. - - * addr.ok is to be asserted when the LD/ST address is known. - addr.data is to be valid on the same cycle. - - addr.ok and addr.data must REMAIN asserted until busy_o - is de-asserted. this ensures that there is no need - for the L0 Cache/Buffer to have an additional address latch - (because the LDSTCompUnit already has it) - - * addr_ok_o (or addr_exc_o) must be waited for. these will - be asserted *only* for one cycle and one cycle only. - - * addr_exc_o will be asserted if there is no chance that the - memory request may be fulfilled. - - busy_o is deasserted on the same cycle as addr_exc_o is asserted. - - * conversely: addr_ok_o must *ONLY* be asserted if there is a - HUNDRED PERCENT guarantee that the memory request will be - fulfilled. - - * for a LD, ld.ok will be asserted - for only one clock cycle - - at any point in the future that is acceptable to the underlying - Memory subsystem. the recipient MUST latch ld.data on that cycle. - - busy_o is deasserted on the same cycle as ld.ok is asserted. - - * for a ST, st.ok may be asserted only after addr_ok_o had been - asserted, alongside valid st.data at the same time. st.ok - must only be asserted for one cycle. - - the underlying Memory is REQUIRED to pick up that data and - guarantee its delivery. no back-acknowledgement is required. - - busy_o is deasserted on the cycle AFTER st.ok is asserted. - """ - - def __init__(self, name=None, regwid=64, addrwid=48): - - self._regwid = regwid - self._addrwid = addrwid - - RecordObject.__init__(self, name=name) - - # distinguish op type (ld/st) - self.is_ld_i = Signal(reset_less=True) - self.is_st_i = Signal(reset_less=True) - - # LD/ST data length (TODO: other things may be needed) - self.data_len = Signal(4, reset_less=True) - - # common signals - self.busy_o = Signal(reset_less=True) # do not use if busy - self.go_die_i = Signal(reset_less=True) # back to reset - self.addr = Data(addrwid, "addr_i") # addr/addr-ok - # addr is valid (TLB, L1 etc.) - self.addr_ok_o = Signal(reset_less=True) - self.addr_exc_o = Signal(reset_less=True) # TODO, "type" of exception - - # LD/ST - self.ld = Data(regwid, "ld_data_o") # ok to be set by L0 Cache/Buf - self.st = Data(regwid, "st_data_i") # ok to be set by CompUnit - - class DualPortSplitter(Elaboratable): """DualPortSplitter diff --git a/src/soc/experiment/pimem.py b/src/soc/experiment/pimem.py new file mode 100644 index 00000000..a344606d --- /dev/null +++ b/src/soc/experiment/pimem.py @@ -0,0 +1,442 @@ +"""L0 Cache/Buffer + +This first version is intended for prototyping and test purposes: +it has "direct" access to Memory. + +The intention is that this version remains an integral part of the +test infrastructure, and, just as with minerva's memory arrangement, +a dynamic runtime config *selects* alternative memory arrangements +rather than *replaces and discards* this code. + +Links: + +* https://bugs.libre-soc.org/show_bug.cgi?id=216 +* https://libre-soc.org/3d_gpu/architecture/memory_and_cache/ + +""" + +from nmigen.compat.sim import run_simulation, Settle +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Mux, Elaboratable, Array, Cat +from nmutil.iocontrol import RecordObject +from nmigen.utils import log2_int +from nmigen.hdl.rec import Record, Layout + +from nmutil.latch import SRLatch, latchregister +from soc.decoder.power_decoder2 import Data +from soc.decoder.power_enums import InternalOp +from soc.regfile.regfile import ortreereduce +from nmutil.util import treereduce + +from soc.decoder.power_decoder2 import Data +#from nmutil.picker import PriorityPicker +from nmigen.lib.coding import PriorityEncoder +from soc.scoreboard.addr_split import LDSTSplitter +from soc.scoreboard.addr_match import LenExpand + +# for testing purposes +from soc.experiment.testmem import TestMemory # TODO: replace with TMLSUI +# TODO: from soc.experiment.testmem import TestMemoryLoadStoreUnit + +import unittest + + +class PortInterface(RecordObject): + """PortInterface + + defines the interface - the API - that the LDSTCompUnit connects + to. note that this is NOT a "fire-and-forget" interface. the + LDSTCompUnit *must* be kept appraised that the request is in + progress, and only when it has a 100% successful completion + can the notification be given (busy dropped). + + The interface FSM rules are as follows: + + * if busy_o is asserted, a LD/ST is in progress. further + requests may not be made until busy_o is deasserted. + + * only one of is_ld_i or is_st_i may be asserted. busy_o + will immediately be asserted and remain asserted. + + * addr.ok is to be asserted when the LD/ST address is known. + addr.data is to be valid on the same cycle. + + addr.ok and addr.data must REMAIN asserted until busy_o + is de-asserted. this ensures that there is no need + for the L0 Cache/Buffer to have an additional address latch + (because the LDSTCompUnit already has it) + + * addr_ok_o (or addr_exc_o) must be waited for. these will + be asserted *only* for one cycle and one cycle only. + + * addr_exc_o will be asserted if there is no chance that the + memory request may be fulfilled. + + busy_o is deasserted on the same cycle as addr_exc_o is asserted. + + * conversely: addr_ok_o must *ONLY* be asserted if there is a + HUNDRED PERCENT guarantee that the memory request will be + fulfilled. + + * for a LD, ld.ok will be asserted - for only one clock cycle - + at any point in the future that is acceptable to the underlying + Memory subsystem. the recipient MUST latch ld.data on that cycle. + + busy_o is deasserted on the same cycle as ld.ok is asserted. + + * for a ST, st.ok may be asserted only after addr_ok_o had been + asserted, alongside valid st.data at the same time. st.ok + must only be asserted for one cycle. + + the underlying Memory is REQUIRED to pick up that data and + guarantee its delivery. no back-acknowledgement is required. + + busy_o is deasserted on the cycle AFTER st.ok is asserted. + """ + + def __init__(self, name=None, regwid=64, addrwid=48): + + self._regwid = regwid + self._addrwid = addrwid + + RecordObject.__init__(self, name=name) + + # distinguish op type (ld/st) + self.is_ld_i = Signal(reset_less=True) + self.is_st_i = Signal(reset_less=True) + + # LD/ST data length (TODO: other things may be needed) + self.data_len = Signal(4, reset_less=True) + + # common signals + self.busy_o = Signal(reset_less=True) # do not use if busy + self.go_die_i = Signal(reset_less=True) # back to reset + self.addr = Data(addrwid, "addr_i") # addr/addr-ok + # addr is valid (TLB, L1 etc.) + self.addr_ok_o = Signal(reset_less=True) + self.addr_exc_o = Signal(reset_less=True) # TODO, "type" of exception + + # LD/ST + self.ld = Data(regwid, "ld_data_o") # ok to be set by L0 Cache/Buf + self.st = Data(regwid, "st_data_i") # ok to be set by CompUnit + + +class LDSTPort(Elaboratable): + def __init__(self, idx, regwid=64, addrwid=48): + self.pi = PortInterface("ldst_port%d" % idx, regwid, addrwid) + + def elaborate(self, platform): + m = Module() + comb, sync = m.d.comb, m.d.sync + + # latches + m.submodules.busy_l = busy_l = SRLatch(False, name="busy") + m.submodules.cyc_l = cyc_l = SRLatch(True, name="cyc") + comb += cyc_l.s.eq(0) + comb += cyc_l.r.eq(0) + + # this is a little weird: we let the L0Cache/Buffer set + # the outputs: this module just monitors "state". + + # LD/ST requested activates "busy" + with m.If(self.pi.is_ld_i | self.pi.is_st_i): + comb += busy_l.s.eq(1) + + # monitor for an exception or the completion of LD. + with m.If(self.pi.addr_exc_o): + comb += busy_l.r.eq(1) + + # however ST needs one cycle before busy is reset + with m.If(self.pi.st.ok | self.pi.ld.ok): + comb += cyc_l.s.eq(1) + + with m.If(cyc_l.q): + comb += cyc_l.r.eq(1) + comb += busy_l.r.eq(1) + + # busy latch outputs to interface + comb += self.pi.busy_o.eq(busy_l.q) + + return m + + def __iter__(self): + yield self.pi.is_ld_i + yield self.pi.is_st_i + yield from self.pi.op.ports() + yield self.pi.busy_o + yield self.pi.go_die_i + yield from self.pi.addr.ports() + yield self.pi.addr_ok_o + yield self.pi.addr_exc_o + + yield from self.pi.ld.ports() + yield from self.pi.st.ports() + + def ports(self): + return list(self) + + +class TestMemoryPortInterface(Elaboratable): + """TestMemoryPortInterface + + This is a test class for simple verification of the LDSTCompUnit + and for the simple core, to be able to run unit tests rapidly and + with less other code in the way. + + Versions of this which are *compatible* (conform with PortInterface) + will include augmented-Wishbone Bus versions, including ones that + connect to L1, L2, MMU etc. etc. however this is the "base lowest + possible version that complies with PortInterface". + """ + + def __init__(self, regwid=64, addrwid=4): + self.mem = TestMemory(regwid, addrwid, granularity=regwid//8) + self.regwid = regwid + self.addrwid = addrwid + self.pi = LDSTPort(0, regwid, addrwid) + + @property + def addrbits(self): + return log2_int(self.mem.regwid//8) + + def splitaddr(self, addr): + """split the address into top and bottom bits of the memory granularity + """ + return addr[:self.addrbits], addr[self.addrbits:] + + def elaborate(self, platform): + m = Module() + comb, sync = m.d.comb, m.d.sync + + # add TestMemory as submodule + m.submodules.mem = self.mem + + # connect the ports as modules + m.submodules.port0 = self.pi + + # state-machine latches + m.submodules.st_active = st_active = SRLatch(False, name="st_active") + m.submodules.ld_active = ld_active = SRLatch(False, name="ld_active") + m.submodules.reset_l = reset_l = SRLatch(True, name="reset") + m.submodules.adrok_l = adrok_l = SRLatch(False, name="addr_acked") + + # expand ld/st binary length/addr[:3] into unary bitmap + m.submodules.lenexp = lenexp = LenExpand(4, 8) + + lds = Signal(reset_less=True) + sts = Signal(reset_less=True) + pi = self.pi.pi + comb += lds.eq(pi.is_ld_i & pi.busy_o) # ld-req signals + comb += sts.eq(pi.is_st_i & pi.busy_o) # st-req signals + + # convenience variables to reference the "picked" port + ldport = pi + stport = pi + # and the memory ports + rdport = self.mem.rdport + wrport = self.mem.wrport + + # Priority-Pickers pick one and only one request, capture its index. + # from that point on this code *only* "listens" to that port. + + sync += adrok_l.s.eq(0) + comb += adrok_l.r.eq(0) + with m.If(lds): + comb += ld_active.s.eq(1) # activate LD mode + with m.Elif(sts): + comb += st_active.s.eq(1) # activate ST mode + + # from this point onwards, with the port "picked", it stays picked + # until ld_active (or st_active) are de-asserted. + + # if now in "LD" mode: wait for addr_ok, then send the address out + # to memory, acknowledge address, and send out LD data + with m.If(ld_active.q): + # set up LenExpander with the LD len and lower bits of addr + lsbaddr, msbaddr = self.splitaddr(ldport.addr.data) + comb += lenexp.len_i.eq(ldport.data_len) + comb += lenexp.addr_i.eq(lsbaddr) + with m.If(ldport.addr.ok & adrok_l.qn): + comb += rdport.addr.eq(msbaddr) # addr ok, send thru + comb += ldport.addr_ok_o.eq(1) # acknowledge addr ok + sync += adrok_l.s.eq(1) # and pull "ack" latch + + # if now in "ST" mode: likewise do the same but with "ST" + # to memory, acknowledge address, and send out LD data + with m.If(st_active.q): + # set up LenExpander with the ST len and lower bits of addr + lsbaddr, msbaddr = self.splitaddr(stport.addr.data) + comb += lenexp.len_i.eq(stport.data_len) + comb += lenexp.addr_i.eq(lsbaddr) + with m.If(stport.addr.ok): + comb += wrport.addr.eq(msbaddr) # addr ok, send thru + with m.If(adrok_l.qn): + comb += stport.addr_ok_o.eq(1) # acknowledge addr ok + sync += adrok_l.s.eq(1) # and pull "ack" latch + + # NOTE: in both these, below, the port itself takes care + # of de-asserting its "busy_o" signal, based on either ld.ok going + # high (by us, here) or by st.ok going high (by the LDSTCompUnit). + + # for LD mode, when addr has been "ok'd", assume that (because this + # is a "Memory" test-class) the memory read data is valid. + comb += reset_l.s.eq(0) + comb += reset_l.r.eq(0) + with m.If(ld_active.q & adrok_l.q): + # shift data down before pushing out. requires masking + # from the *byte*-expanded version of LenExpand output + lddata = Signal(self.regwid, reset_less=True) + # TODO: replace rdport.data with LoadStoreUnitInterface.x_load_data + # and also handle the ready/stall/busy protocol + comb += lddata.eq((rdport.data & lenexp.rexp_o) >> + (lenexp.addr_i*8)) + comb += ldport.ld.data.eq(lddata) # put data out + comb += ldport.ld.ok.eq(1) # indicate data valid + comb += reset_l.s.eq(1) # reset mode after 1 cycle + + # for ST mode, when addr has been "ok'd", wait for incoming "ST ok" + with m.If(st_active.q & stport.st.ok): + # shift data up before storing. lenexp *bit* version of mask is + # passed straight through as byte-level "write-enable" lines. + stdata = Signal(self.regwid, reset_less=True) + comb += stdata.eq(stport.st.data << (lenexp.addr_i*8)) + # TODO: replace with link to LoadStoreUnitInterface.x_store_data + # and also handle the ready/stall/busy protocol + comb += wrport.data.eq(stdata) # write st to mem + comb += wrport.en.eq(lenexp.lexp_o) # enable writes + comb += reset_l.s.eq(1) # reset mode after 1 cycle + + # ugly hack, due to simultaneous addr req-go acknowledge + reset_delay = Signal(reset_less=True) + sync += reset_delay.eq(reset_l.q) + with m.If(reset_delay): + comb += adrok_l.r.eq(1) # address reset + + # after waiting one cycle (reset_l is "sync" mode), reset the port + with m.If(reset_l.q): + comb += ld_active.r.eq(1) # leave the ST active for 1 cycle + comb += st_active.r.eq(1) # leave the ST active for 1 cycle + comb += reset_l.r.eq(1) # clear reset + comb += adrok_l.r.eq(1) # address reset + + return m + + def ports(self): + for p in self.dports: + yield from p.ports() + + +def wait_busy(port, no=False): + while True: + busy = yield port.pi.busy_o + print("busy", no, busy) + if bool(busy) == no: + break + yield + + +def wait_addr(port): + while True: + addr_ok = yield port.pi.addr_ok_o + print("addrok", addr_ok) + if not addr_ok: + break + yield + + +def wait_ldok(port): + while True: + ldok = yield port.pi.ld.ok + print("ldok", ldok) + if ldok: + break + yield + + +def l0_cache_st(dut, addr, data, datalen): + mem = dut.mem + port1 = dut.pi + + # have to wait until not busy + yield from wait_busy(port1, no=False) # wait until not busy + + # set up a ST on the port. address first: + yield port1.pi.is_st_i.eq(1) # indicate ST + yield port1.pi.data_len.eq(datalen) # ST length (1/2/4/8) + + yield port1.pi.addr.data.eq(addr) # set address + yield port1.pi.addr.ok.eq(1) # set ok + yield from wait_addr(port1) # wait until addr ok + # yield # not needed, just for checking + # yield # not needed, just for checking + # assert "ST" for one cycle (required by the API) + yield port1.pi.st.data.eq(data) + yield port1.pi.st.ok.eq(1) + yield + yield port1.pi.st.ok.eq(0) + + # can go straight to reset. + yield port1.pi.is_st_i.eq(0) # end + yield port1.pi.addr.ok.eq(0) # set !ok + # yield from wait_busy(port1, False) # wait until not busy + + +def l0_cache_ld(dut, addr, datalen, expected): + + mem = dut.mem + port1 = dut.pi + + # have to wait until not busy + yield from wait_busy(port1, no=False) # wait until not busy + + # set up a LD on the port. address first: + yield port1.pi.is_ld_i.eq(1) # indicate LD + yield port1.pi.data_len.eq(datalen) # LD length (1/2/4/8) + + yield port1.pi.addr.data.eq(addr) # set address + yield port1.pi.addr.ok.eq(1) # set ok + yield from wait_addr(port1) # wait until addr ok + + yield from wait_ldok(port1) # wait until ld ok + data = yield port1.pi.ld.data + + # cleanup + yield port1.pi.is_ld_i.eq(0) # end + yield port1.pi.addr.ok.eq(0) # set !ok + # yield from wait_busy(port1, no=False) # wait until not busy + + return data + + +def l0_cache_ldst(arg, dut): + yield + addr = 0x2 + data = 0xbeef + data2 = 0xf00f + #data = 0x4 + yield from l0_cache_st(dut, 0x2, data, 2) + yield from l0_cache_st(dut, 0x4, data2, 2) + result = yield from l0_cache_ld(dut, 0x2, 2, data) + result2 = yield from l0_cache_ld(dut, 0x4, 2, data2) + yield + arg.assertEqual(data, result, "data %x != %x" % (result, data)) + arg.assertEqual(data2, result2, "data2 %x != %x" % (result2, data2)) + + + +class TestPIMem(unittest.TestCase): + + def test_pi_mem(self): + + dut = TestMemoryPortInterface(regwid=64) + #vl = rtlil.convert(dut, ports=dut.ports()) + #with open("test_basic_l0_cache.il", "w") as f: + # f.write(vl) + + run_simulation(dut, l0_cache_ldst(self, dut), + vcd_name='test_pi_mem_basic.vcd') + + +if __name__ == '__main__': + unittest.main(exit=False) +