From ff6742035f643135729e19c531f0aa9d86934972 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Wed, 23 Feb 2022 17:17:50 +0000 Subject: [PATCH] forgot to pass cix (cache-inhibited) through to LD/ST which was causing D-Cache to make too many read/writes and overrun wishbone bus addressing on peripherals. D-Cache always (for non-cache-inhibited) makes 8 64-bit requests: if a memory-mapped peripheral has only say 3 32-bit CSRs then the additional requests beyond the range of the peripheral will cause a permanent lock-up --- src/soc/experiment/compldst_multi.py | 1 + src/soc/experiment/pi2ls.py | 4 ++-- src/soc/experiment/pimem.py | 13 +++++++------ src/soc/experiment/test/test_l0_cache_buffer2.py | 4 ++-- src/soc/experiment/test/test_mmu_dcache_pi.py | 4 ++-- src/soc/fu/ldst/loadstore.py | 6 ++++-- 6 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/soc/experiment/compldst_multi.py b/src/soc/experiment/compldst_multi.py index d548f90c..094fbc6d 100644 --- a/src/soc/experiment/compldst_multi.py +++ b/src/soc/experiment/compldst_multi.py @@ -557,6 +557,7 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable): # connect to LD/ST PortInterface. comb += pi.is_ld_i.eq(op_is_ld & busy_o) # decoded-LD + comb += pi.is_nc.eq(op_is_cix & busy_o) # cache-inhibited comb += pi.is_st_i.eq(op_is_st_or_dcbz & busy_o) # decoded-ST comb += pi.is_dcbz_i.eq(op_is_dcbz & busy_o) # decoded-DCBZ comb += pi.reserve.eq(oper_r.reserve & busy_o) # atomic LR/SC diff --git a/src/soc/experiment/pi2ls.py b/src/soc/experiment/pi2ls.py index a792e184..023f4758 100644 --- a/src/soc/experiment/pi2ls.py +++ b/src/soc/experiment/pi2ls.py @@ -46,13 +46,13 @@ class Pi2LSUI(PortInterfaceBase): self.lsui_busy = Signal() self.valid_l = SRLatch(False, name="valid") - def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz): + def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc): print("pi2lsui TODO, implement is_dcbz") m.d.comb += self.valid_l.s.eq(1) m.d.comb += self.lsui.x_mask_i.eq(mask) m.d.comb += self.lsui.x_addr_i.eq(addr) - def set_rd_addr(self, m, addr, mask, misalign, msr): + def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc): m.d.comb += self.valid_l.s.eq(1) m.d.comb += self.lsui.x_mask_i.eq(mask) m.d.comb += self.lsui.x_addr_i.eq(addr) diff --git a/src/soc/experiment/pimem.py b/src/soc/experiment/pimem.py index 498ecb8f..93db9d6e 100644 --- a/src/soc/experiment/pimem.py +++ b/src/soc/experiment/pimem.py @@ -181,8 +181,8 @@ class PortInterfaceBase(Elaboratable): def connect_port(self, inport): return self.pi.connect_port(inport) - def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz): pass - def set_rd_addr(self, m, addr, mask, misalign, msr): pass + def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc): pass + def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc): pass def set_wr_data(self, m, data, wen): pass def get_rd_data(self, m): pass @@ -258,7 +258,8 @@ class PortInterfaceBase(Elaboratable): comb += lenexp.len_i.eq(pi.data_len) comb += lenexp.addr_i.eq(lsbaddr) with m.If(pi.addr.ok & adrok_l.qn): - self.set_rd_addr(m, pi.addr.data, lenexp.lexp_o, misalign, msr) + self.set_rd_addr(m, pi.addr.data, lenexp.lexp_o, misalign, + msr, pi.is_nc) comb += pi.addr_ok_o.eq(1) # acknowledge addr ok sync += adrok_l.s.eq(1) # and pull "ack" latch @@ -271,7 +272,7 @@ class PortInterfaceBase(Elaboratable): comb += lenexp.addr_i.eq(lsbaddr) with m.If(pi.addr.ok): self.set_wr_addr(m, pi.addr.data, lenexp.lexp_o, misalign, msr, - pi.is_dcbz_i) + pi.is_dcbz_i, pi.is_nc) with m.If(adrok_l.qn & self.pi.exc_o.happened==0): comb += pi.addr_ok_o.eq(1) # acknowledge addr ok sync += adrok_l.s.eq(1) # and pull "ack" latch @@ -367,11 +368,11 @@ class TestMemoryPortInterface(PortInterfaceBase): # hard-code memory addressing width to 6 bits self.mem = TestMemory(regwid, 5, granularity=regwid//8, init=False) - def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz): + def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc): lsbaddr, msbaddr = self.splitaddr(addr) m.d.comb += self.mem.wrport.addr.eq(msbaddr) - def set_rd_addr(self, m, addr, mask, misalign, msr): + def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc): lsbaddr, msbaddr = self.splitaddr(addr) m.d.comb += self.mem.rdport.addr.eq(msbaddr) diff --git a/src/soc/experiment/test/test_l0_cache_buffer2.py b/src/soc/experiment/test/test_l0_cache_buffer2.py index 066cf431..c331a7b5 100644 --- a/src/soc/experiment/test/test_l0_cache_buffer2.py +++ b/src/soc/experiment/test/test_l0_cache_buffer2.py @@ -25,10 +25,10 @@ class TestCachedMemoryPortInterface(PortInterfaceBase): super().__init__(regwid, addrwid) self.ldst = LDSTSplitter(32, 48, 4) - def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz): + def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc): m.d.comb += self.ldst.addr_i.eq(addr) - def set_rd_addr(self, m, addr, mask, misalign, msr): + def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc): m.d.comb += self.ldst.addr_i.eq(addr) def set_wr_data(self, m, data, wen): diff --git a/src/soc/experiment/test/test_mmu_dcache_pi.py b/src/soc/experiment/test/test_mmu_dcache_pi.py index c2b76df5..338480d8 100644 --- a/src/soc/experiment/test/test_mmu_dcache_pi.py +++ b/src/soc/experiment/test/test_mmu_dcache_pi.py @@ -85,14 +85,14 @@ class TestMicrowattMemoryPortInterface(PortInterfaceBase): self.mmu = mmu self.dcache = dcache - def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz): + def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc): m.d.comb += self.dcache.d_in.addr.eq(addr) m.d.comb += self.mmu.l_in.addr.eq(addr) m.d.comb += self.mmu.l_in.load.eq(0) m.d.comb += self.mmu.l_in.priv.eq(~msr.pr) # TODO verify m.d.comb += self.mmu.l_in.valid.eq(1) - def set_rd_addr(self, m, addr, mask, misalign, msr): + def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc): m.d.comb += self.dcache.d_in.addr.eq(addr) m.d.comb += self.mmu.l_in.addr.eq(addr) m.d.comb += self.mmu.l_in.load.eq(1) diff --git a/src/soc/fu/ldst/loadstore.py b/src/soc/fu/ldst/loadstore.py index 56c28c81..6e868b1e 100644 --- a/src/soc/fu/ldst/loadstore.py +++ b/src/soc/fu/ldst/loadstore.py @@ -149,7 +149,8 @@ class LoadStore1(PortInterfaceBase): def external_busy(self, m): return self.instr_fault | self.r_instr_fault - def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz): + def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz, is_nc): + m.d.comb += self.req.nc.eq(is_nc) m.d.comb += self.req.load.eq(0) # store operation m.d.comb += self.req.byte_sel.eq(mask) m.d.comb += self.req.raddr.eq(addr) @@ -179,7 +180,7 @@ class LoadStore1(PortInterfaceBase): return None - def set_rd_addr(self, m, addr, mask, misalign, msr): + def set_rd_addr(self, m, addr, mask, misalign, msr, is_nc): m.d.comb += self.d_valid.eq(1) m.d.comb += self.req.load.eq(1) # load operation m.d.comb += self.req.byte_sel.eq(mask) @@ -187,6 +188,7 @@ class LoadStore1(PortInterfaceBase): m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit + m.d.comb += self.req.nc.eq(is_nc) # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx # this is for peripherals. same thing done in Microwatt loadstore1.vhdl with m.If(addr[28:] == Const(0xc, 4)): -- 2.30.2