From a9e76d84c643cef88584e33ef9978642cdd23ce6 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 4 Mar 2022 14:58:28 +0000 Subject: [PATCH] add experimental stall-capable 64-to-32 wishbone converter based on microwatt soc.vhdl --- src/ls2.py | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 142 insertions(+), 6 deletions(-) diff --git a/src/ls2.py b/src/ls2.py index 444fb6a..e6cc8bf 100644 --- a/src/ls2.py +++ b/src/ls2.py @@ -7,7 +7,8 @@ # Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER # under EU Grants 871528 and 957073, under the LGPLv3+ License -from nmigen import (Module, Elaboratable, DomainRenamer, Record) +from nmigen import (Module, Elaboratable, DomainRenamer, Record, + Signal, Cat, Const) from nmigen.cli import verilog from nmigen.lib.cdc import ResetSynchronizer from nmigen_soc import wishbone, memory @@ -75,6 +76,141 @@ def sim_ddr3_settings(clk_freq=100e6): ) +class WB64to32Convert(Elaboratable): + """Microwatt IO wishbone slave 64->32 bits converter + + For timing reasons, this adds a one cycle latch on the way both + in and out. This relaxes timing and routing pressure on the "main" + memory bus by moving all simple IOs to a slower 32-bit bus. + + This implementation is rather dumb at the moment, no stash buffer, + so we stall whenever that latch is busy. This can be improved. + """ + def __init__(self, master, slave): + self.master = master + self.slave = slave + + def elaborate(self, platform): + m = Module() + comb, sync = m.d.comb, m.d.sync + master, slave = self.master, self.slave + + has_top = Signal() + has_bot = Signal() + + # Do we have a top word and/or a bottom word ? + with m.If(master.cyc & master.stb): + comb += has_top.eq(master.sel[4:].bool()) + comb += has_bot.eq(master.sel[:4].bool()) + + with m.FSM() as fsm: + with m.State("IDLE"): + # Clear ACK in case it was set + sync += master.ack.eq(0) + + # Do we have a cycle ? + with m.If(master.cyc & master.stb): + # Stall master until we are done, we are't (yet) pipelining + # this, it's all slow IOs. + sync += master.stall.eq(1) + + # Start cycle downstream + sync += slave.cyc.eq(1) + sync += slave.stb.eq(1) + + # Copy write enable to IO out, copy address as well, + # LSB is set later based on HI/LO + sync += slave.we.eq(master.we) + sync += slave.adr.eq(Cat(0, master.adr)) + + # If we have a bottom word, handle it first, otherwise + # send the top word down. XXX Split the actual mux out + # and only generate a control signal. + with m.If(has_bot): + with m.If(master.we): + sync += slave.dat_w.eq(master.dat_w[:32]) + sync += slave.sel.eq(master.sel[:4]) + + # Wait for ack on BOTTOM half + m.next = "WAIT_ACK_BOT" + + with m.Else(): + with m.If(master.we): + sync += slave.dat_w.eq(master.dat_w[32:]) + sync += slave.sel.eq(master.sel[4:]) + + # Bump LSB of address + sync += slave.adr[0].eq(1) + + # Wait for ack on TOP half + m.next = "WAIT_ACK_TOP" + + with m.State("WAIT_ACK_BOT"): + # If we aren't stalled by the device, clear stb + if hasattr(slave, "stall"): + with m.If(~slave.stall): + sync += slave.stb.eq(0) + else: + sync += slave.stb.eq(0) + + # Handle ack + with m.If(slave.ack): + # If it's a read, latch the data + with m.If(~slave.we): + sync += master.dat_r[:32].eq(slave.dat_r) + + # Do we have a "top" part as well ? + with m.If(has_top): + # Latch data & sel + with m.If(master.we): + sync += slave.dat_w.eq(master.dat_w[32:]) + sync += slave.sel.eq(master.sel[4:]) + + # Bump address and set STB + sync += slave.adr[0].eq(1) + sync += slave.stb.eq(1) + + # Wait for new ack + m.next = "WAIT_ACK_TOP" + + with m.Else(): + # We are done, ack up, clear cyc downstram + sync += slave.cyc.eq(0) + + # And ack & unstall upstream + sync += master.ack.eq(1) + if hasattr(master , "stall"): + sync += master.stall.eq(0) + + # Wait for next one + m.next = "IDLE" + + with m.State("WAIT_ACK_TOP"): + # If we aren't stalled by the device, clear stb + if hasattr(slave, "stall"): + with m.If(~slave.stall): + sync += slave.stb.eq(0) + else: + sync += slave.stb.eq(0) + + # Handle ack + with m.If(slave.ack): + # If it's a read, latch the data + with m.If(~slave.we): + sync += master.dat_r[32:].eq(slave.dat_r) + + # We are done, ack up, clear cyc downstram + sync += slave.cyc.eq(0) + + # And ack & unstall upstream + sync += master.ack.eq(1) + if hasattr(master , "stall"): + sync += master.stall.eq(0) + + # Wait for next one + m.next = "IDLE" + + return m class DDR3SoC(SoC, Elaboratable): @@ -111,8 +247,8 @@ class DDR3SoC(SoC, Elaboratable): granularity=8) cvtibus = wishbone.Interface(addr_width=30, data_width=32, granularity=8) - self.dbusdowncvt = WishboneDownConvert(self.cpu.dbus, cvtdbus) - self.ibusdowncvt = WishboneDownConvert(self.cpu.ibus, cvtibus) + self.dbusdowncvt = WB64to32Convert(self.cpu.dbus, cvtdbus) + self.ibusdowncvt = WB64to32Convert(self.cpu.ibus, cvtibus) self._arbiter.add(cvtibus) # I-Cache Master self._arbiter.add(cvtdbus) # D-Cache Master. TODO JTAG master @@ -246,9 +382,9 @@ class DDR3SoC(SoC, Elaboratable): m.submodules.dbuscvt = self.dbusdowncvt m.submodules.ibuscvt = self.ibusdowncvt # create stall sigs, assume wishbone classic - ibus, dbus = self.cpu.ibus, self.cpu.dbus - comb += ibus.stall.eq(ibus.stb & ~ibus.ack) - comb += dbus.stall.eq(dbus.stb & ~dbus.ack) + #ibus, dbus = self.cpu.ibus, self.cpu.dbus + #comb += ibus.stall.eq(ibus.stb & ~ibus.ack) + #comb += dbus.stall.eq(dbus.stb & ~dbus.ack) m.submodules.arbiter = self._arbiter m.submodules.decoder = self._decoder -- 2.30.2