From 67745ec03629b76547f08783d5978bccc464cce1 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 15 Aug 2020 22:54:50 +0100 Subject: [PATCH] rather big change to interaction between regfile and compunits on read regfiles are now sync-delayed by one clock from "ren". this means that a read-request has to be fired off then excluded from the PriorityPicker, whilst waiting for the output to arrive on the next clock. *then* the "go read" signal can be fired, which gets the data (arriving 1 cycle late from the regfile) "in sync" with its "go read" --- src/soc/regfile/regfile.py | 58 ++++++++++++++++++++++-------- src/soc/simple/core.py | 21 +++++++---- src/soc/simple/issuer.py | 31 +++++++++------- src/soc/simple/test/test_core.py | 6 ++-- src/soc/simple/test/test_issuer.py | 16 +++++++++ 5 files changed, 96 insertions(+), 36 deletions(-) diff --git a/src/soc/regfile/regfile.py b/src/soc/regfile/regfile.py index e06695eb..10b3fa06 100644 --- a/src/soc/regfile/regfile.py +++ b/src/soc/regfile/regfile.py @@ -32,9 +32,10 @@ import operator class Register(Elaboratable): - def __init__(self, width, writethru=True): + def __init__(self, width, writethru=True, synced=True): self.width = width self.writethru = writethru + self.synced = synced self._rdports = [] self._wrports = [] @@ -56,22 +57,26 @@ class Register(Elaboratable): m = Module() self.reg = reg = Signal(self.width, name="reg") + if self.synced: + domain = m.d.sync + else: + domain = m.d.comb + # read ports. has write-through detection (returns data written) for rp in self._rdports: - with m.If(rp.ren == 1): + domain += rp.data_o.eq(0) + with m.If(rp.ren): if self.writethru: wr_detect = Signal(reset_less=False) m.d.comb += wr_detect.eq(0) for wp in self._wrports: with m.If(wp.wen): - m.d.comb += rp.data_o.eq(wp.data_i) + domain += rp.data_o.eq(wp.data_i) m.d.comb += wr_detect.eq(1) with m.If(~wr_detect): - m.d.comb += rp.data_o.eq(reg) + domain += rp.data_o.eq(reg) else: - m.d.comb += rp.data_o.eq(reg) - with m.Else(): - m.d.comb += rp.data_o.eq(0) + domain += rp.data_o.eq(reg) # write ports, delayed by 1 cycle for wp in self._wrports: @@ -101,10 +106,12 @@ class RegFileArray(Elaboratable): and read-en signals (per port). """ - def __init__(self, width, depth): + def __init__(self, width, depth, synced=True): + self.synced = synced self.width = width self.depth = depth - self.regs = Array(Register(width) for _ in range(self.depth)) + self.regs = Array(Register(width, synced=synced) \ + for _ in range(self.depth)) self._rdports = [] self._wrports = [] @@ -149,11 +156,22 @@ class RegFileArray(Elaboratable): for i, reg in enumerate(self.regs): setattr(m.submodules, "reg_%d" % i, reg) + if self.synced: + domain = m.d.sync + else: + domain = m.d.comb + for (regs, p) in self._rdports: #print (p) m.d.comb += self._get_en_sig(regs, 'ren').eq(p.ren) ror = ortreereduce(list(regs)) - m.d.comb += p.data_o.eq(ror) + if self.synced: + ren_delay = Signal.like(p.ren) + m.d.sync += ren_delay.eq(p.ren) + with m.If(ren_delay): + m.d.comb += p.data_o.eq(ror) + else: + m.d.comb += p.data_o.eq(ror) for (regs, p) in self._wrports: m.d.comb += self._get_en_sig(regs, 'wen').eq(p.wen) for r in regs: @@ -171,8 +189,9 @@ class RegFileArray(Elaboratable): class RegFileMem(Elaboratable): unary = False - def __init__(self, width, depth, fwd_bus_mode=False): + def __init__(self, width, depth, fwd_bus_mode=False, synced=True): self.fwd_bus_mode = fwd_bus_mode + self.synced = synced self.width, self.depth = width, depth self.memory = Memory(width=width, depth=depth) self._rdports = {} @@ -183,7 +202,11 @@ class RegFileMem(Elaboratable): port = RecordObject([("addr", bsz), ("ren", 1), ("data_o", self.width)], name=name) - self._rdports[name] = (port, self.memory.read_port(domain="comb")) + if self.synced: + domain = "sync" + else: + domain = "comb" + self._rdports[name] = (port, self.memory.read_port(domain=domain)) return port def write_port(self, name=None): @@ -215,7 +238,12 @@ class RegFileMem(Elaboratable): with m.If(~wr_detect): m.d.comb += rp.data_o.eq(rport.data) else: - with m.If(rp.ren): + if self.synced: + ren_delay = Signal.like(rp.ren) + m.d.sync += ren_delay.eq(rp.ren) + with m.If(ren_delay): + m.d.comb += rp.data_o.eq(rport.data) + else: m.d.comb += rp.data_o.eq(rport.data) # write ports, delayed by one cycle (in the memory itself) @@ -384,7 +412,7 @@ def test_regfile(): run_simulation(dut, regfile_sim(dut, rp, wp), vcd_name='test_regfile.vcd') - dut = RegFileMem(32, 8) + dut = RegFileMem(32, 8, True, False) rp = dut.read_port("rp1") wp = dut.write_port("wp1") vl = rtlil.convert(dut)#, ports=dut.ports()) @@ -393,7 +421,7 @@ def test_regfile(): run_simulation(dut, regfile_sim(dut, rp, wp), vcd_name='test_regmem.vcd') - dut = RegFileArray(32, 8) + dut = RegFileArray(32, 8, False) rp1 = dut.read_port("read1") rp2 = dut.read_port("read2") wp = dut.write_port("write") diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py index 19f076ba..8e70feac 100644 --- a/src/soc/simple/core.py +++ b/src/soc/simple/core.py @@ -222,29 +222,38 @@ class NonProductionCore(Elaboratable): fu_active = fu_bitdict[funame] name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi) addr_en = Signal.like(reads[i], name="addr_en_"+name) - rp = Signal(name="rp_"+name) - pick = Signal() + pick = Signal(name="pick_"+name) # picker input + rp = Signal(name="rp_"+name) # picker output + delay_pick = Signal(name="dp_"+name) # read-enable "underway" - comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i]) + # exclude any currently-enabled read-request (mask out active) + comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] & + ~delay_pick) comb += rdpick.i[pi].eq(pick) - sync += fu.go_rd_i[idx].eq(rising_edge(m, rp)) + comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick + # if picked, select read-port "reg select" number to port comb += rp.eq(rdpick.o[pi] & rdpick.en_o) + sync += delay_pick.eq(rp) # delayed "pick" comb += addr_en.eq(Mux(rp, reads[i], 0)) + + # the read-enable happens combinatorially (see mux-bus below) + # but it results in the data coming out on a one-cycle delay. if rfile.unary: rens.append(addr_en) else: addrs.append(addr_en) rens.append(rp) - with m.If(rp): + # use the *delayed* pick signal to put requested data onto bus + with m.If(delay_pick): # connect regfile port to input, creating fan-out Bus src = fu.src_i[idx] print("reg connect widths", regfile, regname, pi, funame, src.shape(), rport.data_o.shape()) # all FUs connect to same port - sync += src.eq(rport.data_o) + comb += src.eq(rport.data_o) # or-reduce the muxed read signals if rfile.unary: diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py index 05156119..e6f837e4 100644 --- a/src/soc/simple/issuer.py +++ b/src/soc/simple/issuer.py @@ -135,20 +135,24 @@ class TestIssuer(Elaboratable): # read the PC pc = Signal(64, reset_less=True) + pc_ok_delay = Signal() + sync += pc_ok_delay.eq(~self.pc_i.ok) with m.If(self.pc_i.ok): # incoming override (start from pc_i) comb += pc.eq(self.pc_i.data) with m.Else(): - # otherwise read StateRegs regfile for PC + # otherwise read StateRegs regfile for PC... comb += self.state_r_pc.ren.eq(1<> (i*4)) & 0xf #cri = int('{:04b}'.format(cri)[::-1], 2) - print("cr reg", hex(cri), i, + print("setup cr reg", hex(cri), i, crregs.regs[i].reg.shape()) yield crregs.regs[i].reg.eq(cri) # set up XER. "direct" write (bypass rd/write ports) xregs = core.regs.xer - print("sprs", test.sprs) + print("setup sprs", test.sprs) xer = None if 'XER' in test.sprs: xer = test.sprs['XER'] diff --git a/src/soc/simple/test/test_issuer.py b/src/soc/simple/test/test_issuer.py index 3e4e236a..c4877940 100644 --- a/src/soc/simple/test/test_issuer.py +++ b/src/soc/simple/test/test_issuer.py @@ -247,6 +247,16 @@ class TestRunner(FHDLTestCase): terminated = yield issuer.dbg.terminated_o print("terminated", terminated) + if index >= len(instructions): + print ("index over, send dmi stop") + # stop at end + yield from set_dmi(dmi, DBGCore.CTRL, 1<