X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fsimple%2Fissuer.py;h=15bd1760a5ab93f233d8cb7cdff813d7b0833096;hb=HEAD;hp=15642018b0e459836ae809e24ed1edc96a189af8;hpb=884253f76482a4e306402d8d826197de8c30e401;p=soc.git diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py index 15642018..15bd1760 100644 --- a/src/soc/simple/issuer.py +++ b/src/soc/simple/issuer.py @@ -165,6 +165,26 @@ class TestIssuerBase(Elaboratable): def __init__(self, pspec): + # test if microwatt compatibility is to be enabled + self.microwatt_compat = (hasattr(pspec, "microwatt_compat") and + (pspec.microwatt_compat == True)) + self.alt_reset = Signal(reset_less=True) # not connected yet (microwatt) + # test if fabric compatibility is to be enabled + self.fabric_compat = (hasattr(pspec, "fabric_compat") and + (pspec.fabric_compat == True)) + + if self.microwatt_compat or self.fabric_compat: + + if hasattr(pspec, "microwatt_old"): + self.microwatt_old = pspec.microwatt_old + else: + self.microwatt_old = True # PLEASE DO NOT ALTER THIS + + if hasattr(pspec, "microwatt_debug"): + self.microwatt_debug = pspec.microwatt_debug + else: + self.microwatt_debug = True # set to False when using an FPGA + # test is SVP64 is to be enabled self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True) @@ -189,7 +209,7 @@ class TestIssuerBase(Elaboratable): #self.dbg_domain = "sync" # sigh "dbgsunc" too problematic self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock if self.jtag_en: - # XXX MUST keep this up-to-date with litex, and + # XXX MUST keep this up-to-date with fabric, and # soc-cocotb-sim, and err.. all needs sorting out, argh subset = ['uart', 'mtwi', @@ -226,6 +246,8 @@ class TestIssuerBase(Elaboratable): self.xics_icp = XICS_ICP() self.xics_ics = XICS_ICS() self.int_level_i = self.xics_ics.int_level_i + else: + self.ext_irq = Signal() # add GPIO peripheral? self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True @@ -278,17 +300,19 @@ class TestIssuerBase(Elaboratable): self.state_r_pc = staterf.r_ports['cia'] # PC rd self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd - self.state_w_msr = staterf.w_ports['msr'] # MSR wr + self.state_w_msr = staterf.w_ports['d_wr2'] # MSR wr self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr # DMI interface access intrf = self.core.regs.rf['int'] + fastrf = self.core.regs.rf['fast'] crrf = self.core.regs.rf['cr'] xerrf = self.core.regs.rf['xer'] - self.int_r = intrf.r_ports['dmi'] # INT read - self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read - self.xer_r = xerrf.r_ports['full_xer'] # XER read + self.int_r = intrf.r_ports['dmi'] # INT DMI read + self.cr_r = crrf.r_ports['full_cr_dbg'] # CR DMI read + self.xer_r = xerrf.r_ports['full_xer'] # XER DMI read + self.fast_r = fastrf.r_ports['dmi'] # FAST DMI read if self.svp64_en: # for predication @@ -298,6 +322,8 @@ class TestIssuerBase(Elaboratable): # hack method of keeping an eye on whether branch/trap set the PC self.state_nia = self.core.regs.rf['state'].w_ports['nia'] self.state_nia.wen.name = 'state_nia_wen' + # and whether SPR pipeline sets DEC or TB (fu/spr/main_stage.py) + self.state_spr = self.core.regs.rf['state'].w_ports['state1'] # pulse to synchronize the simulator at instruction end self.insn_done = Signal() @@ -310,6 +336,27 @@ class TestIssuerBase(Elaboratable): self.srcmask = Signal(64) self.dstmask = Signal(64) + # sigh, the wishbone addresses are not wishbone-compliant + # in old versions of microwatt, tplaten_3d_game is a new one + if self.microwatt_compat or self.fabric_compat: + self.ibus_adr = Signal(32, name='wishbone_insn_out.adr') + self.dbus_adr = Signal(32, name='wishbone_data_out.adr') + + # add an output of the PC and instruction, and whether it was requested + # this is for verilator debug purposes + if self.microwatt_compat or self.fabric_compat: + self.nia = Signal(64) + self.msr_o = Signal(64) + self.nia_req = Signal(1) + self.insn = Signal(32) + self.ldst_req = Signal(1) + self.ldst_addr = Signal(1) + + # for pausing dec/tb during an SPR pipeline event, this + # ensures that an SPR write (mtspr) to TB or DEC does not + # get overwritten by the DEC/TB FSM + self.pause_dec_tb = Signal() + def setup_peripherals(self, m): comb, sync = m.d.comb, m.d.sync @@ -320,11 +367,17 @@ class TestIssuerBase(Elaboratable): csd = DomainRenamer(self.core_domain) dbd = DomainRenamer(self.dbg_domain) - m.submodules.core = core = csd(self.core) + if self.microwatt_compat or self.fabric_compat: + m.submodules.core = core = self.core + else: + m.submodules.core = core = csd(self.core) + # this _so_ needs sorting out. ICache is added down inside # LoadStore1 and is already a submodule of LoadStore1 if not isinstance(self.imem, ICache): m.submodules.imem = imem = csd(self.imem) + + # set up JTAG Debug Module (in correct domain) m.submodules.dbg = dbg = dbd(self.dbg) if self.jtag_en: m.submodules.jtag = jtag = dbd(self.jtag) @@ -332,9 +385,37 @@ class TestIssuerBase(Elaboratable): # see https://bugs.libre-soc.org/show_bug.cgi?id=499 sync += dbg.dmi.connect_to(jtag.dmi) + # fixup the clocks in microwatt-compat mode (but leave resets alone + # so that microwatt soc.vhdl can pull a reset on the core or DMI + # can do it, just like in TestIssuer) + if self.microwatt_compat or self.fabric_compat: + intclk = ClockSignal(self.core_domain) + dbgclk = ClockSignal(self.dbg_domain) + if self.core_domain != 'sync': + comb += intclk.eq(ClockSignal()) + if self.dbg_domain != 'sync': + comb += dbgclk.eq(ClockSignal()) + + # if using old version of microwatt + # drop the first 3 bits of the incoming wishbone addresses + if self.microwatt_compat or self.fabric_compat: + ibus = self.imem.ibus + dbus = self.core.l0.cmpi.wb_bus() + if self.microwatt_old: + comb += self.ibus_adr.eq(Cat(Const(0, 3), ibus.adr)) + comb += self.dbus_adr.eq(Cat(Const(0, 3), dbus.adr)) + else: + comb += self.ibus_adr.eq(ibus.adr) + comb += self.dbus_adr.eq(dbus.adr) + if self.microwatt_debug: + # microwatt verilator debug purposes + pi = self.core.l0.cmpi.pi.pi + comb += self.ldst_req.eq(pi.addr_ok_o) + comb += self.ldst_addr.eq(pi.addr) + cur_state = self.cur_state - # 4x 4k SRAM blocks. these simply "exist", they get routed in litex + # 4x 4k SRAM blocks. these simply "exist", they get routed in fabric if self.sram4x4k: for i, sram in enumerate(self.sram4k): m.submodules["sram4k_%d" % i] = csd(sram) @@ -346,13 +427,15 @@ class TestIssuerBase(Elaboratable): m.submodules.xics_ics = ics = csd(self.xics_ics) comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core + else: + sync += cur_state.eint.eq(self.ext_irq) # connect externally # GPIO test peripheral if self.gpio: m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio) # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl) - # XXX causes litex ECP5 test to get wrong idea about input and output + # XXX causes fabric ECP5 test to get wrong idea about input and output # (but works with verilator sim *sigh*) # if self.gpio and self.xics: # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0]) @@ -363,10 +446,6 @@ class TestIssuerBase(Elaboratable): if self.svp64_en: m.submodules.svp64 = svp64 = csd(self.svp64) - # convenience - dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer - intrf = self.core.regs.rf['int'] - # clock delay power-on reset cd_por = ClockDomain(reset_less=True) cd_sync = ClockDomain() @@ -378,10 +457,14 @@ class TestIssuerBase(Elaboratable): dbg_sync = ClockDomain(self.dbg_domain) m.domains += dbg_sync + # create a delay, but remember it is in the power-on-reset clock domain! ti_rst = Signal(reset_less=True) delay = Signal(range(4), reset=3) + stop_delay = Signal(range(16), reset=5) with m.If(delay != 0): - m.d.por += delay.eq(delay - 1) + m.d.por += delay.eq(delay - 1) # decrement... in POR domain! + with m.If(stop_delay != 0): + m.d.por += stop_delay.eq(stop_delay - 1) # likewise comb += cd_por.clk.eq(ClockSignal()) # power-on reset delay @@ -392,6 +475,9 @@ class TestIssuerBase(Elaboratable): else: with m.If(delay != 0 | dbg.core_rst_o): comb += core_rst.eq(1) + with m.If(stop_delay != 0): + # run DMI core-stop as well but on an extra couple of cycles + comb += dbg.core_stopped_i.eq(1) # connect external reset signal to DMI Reset if self.dbg_domain != "sync": @@ -404,12 +490,13 @@ class TestIssuerBase(Elaboratable): comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i) # temporary hack: says "go" immediately for both address gen and ST + # XXX: st.go_i is set to 1 cycle delay to reduce combinatorial chains l0 = core.l0 ldst = core.fus.fus['ldst0'] st_go_edge = rising_edge(m, ldst.st.rel_o) # link addr-go direct to rel m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) - m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel + m.d.sync += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel def do_dmi(self, m, dbg): """deals with DMI debug requests @@ -420,7 +507,9 @@ class TestIssuerBase(Elaboratable): comb = m.d.comb sync = m.d.sync dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer + d_fast = dbg.d_fast intrf = self.core.regs.rf['int'] + fastrf = self.core.regs.rf['fast'] with m.If(d_reg.req): # request for regfile access being made # TODO: error-check this @@ -437,6 +526,20 @@ class TestIssuerBase(Elaboratable): comb += d_reg.data.eq(self.int_r.o_data) comb += d_reg.ack.eq(1) + # fast regfile + with m.If(d_fast.req): # request for regfile access being made + if fastrf.unary: + comb += self.fast_r.ren.eq(1 << d_fast.addr) + else: + comb += self.fast_r.addr.eq(d_fast.addr) + comb += self.fast_r.ren.eq(1) + d_fast_delay = Signal() + sync += d_fast_delay.eq(d_fast.req) + with m.If(d_fast_delay): + # data arrives one clock later + comb += d_fast.data.eq(self.fast_r.o_data) + comb += d_fast.ack.eq(1) + # sigh same thing for CR debug with m.If(d_cr.req): # request for regfile access being made comb += self.cr_r.ren.eq(0b11111111) # enable all @@ -465,47 +568,57 @@ class TestIssuerBase(Elaboratable): value to DEC, however the regfile has "passthrough" on it so this *should* be ok. - see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076 + see v3.0B p1097-1099 for Timer Resource and p1065 and p1076 """ comb, sync = m.d.comb, m.d.sync - fast_rf = self.core.regs.rf['fast'] - fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB - fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB + state_rf = self.core.regs.rf['state'] + state_r_dectb = state_rf.r_ports['issue'] # DEC/TB + state_w_dectb = state_rf.w_ports['issue'] # DEC/TB + with m.FSM() as fsm: # initiates read of current DEC with m.State("DEC_READ"): - comb += fast_r_dectb.addr.eq(FastRegs.DEC) - comb += fast_r_dectb.ren.eq(1) - m.next = "DEC_WRITE" + comb += state_r_dectb.ren.eq(1<1 loop - with m.If(~dbg.core_stop_o & ~core_rst): - comb += exec_pc_i_ready.eq(1) - # see https://bugs.libre-soc.org/show_bug.cgi?id=636 - # the exception info needs to be blatted into - # pdecode.ldst_exc, and the instruction "re-run". - # when ldst_exc.happened is set, the PowerDecoder2 - # reacts very differently: it re-writes the instruction - # with a "trap" (calls PowerDecoder2.trap()) which - # will *overwrite* whatever was requested and jump the - # PC to the exception address, as well as alter MSR. - # nothing else needs to be done other than to note - # the change of PC and MSR (and, later, SVSTATE) - with m.If(exc_happened): - mmu = core.fus.get_exc("mmu0") - ldst = core.fus.get_exc("ldst0") - if mmu is not None: - with m.If(fetch_failed): - # instruction fetch: exception is from MMU - # reset instr_fault (highest priority) - sync += pdecode2.ldst_exc.eq(mmu) - sync += pdecode2.instr_fault.eq(0) - if flush_needed: - # request icache to stop asserting "failed" - comb += core.icache.flush_in.eq(1) - with m.If(~fetch_failed): - # otherwise assume it was a LDST exception - sync += pdecode2.ldst_exc.eq(ldst) - - with m.If(exec_pc_o_valid): - - # was this the last loop iteration? - is_last = Signal() - cur_vl = cur_state.svstate.vl - comb += is_last.eq(next_srcstep == cur_vl) - - with m.If(pdecode2.instr_fault): - # reset instruction fault, try again + comb += exec_pc_i_ready.eq(1) + # see https://bugs.libre-soc.org/show_bug.cgi?id=636 + # the exception info needs to be blatted into + # pdecode.ldst_exc, and the instruction "re-run". + # when ldst_exc.happened is set, the PowerDecoder2 + # reacts very differently: it re-writes the instruction + # with a "trap" (calls PowerDecoder2.trap()) which + # will *overwrite* whatever was requested and jump the + # PC to the exception address, as well as alter MSR. + # nothing else needs to be done other than to note + # the change of PC and MSR (and, later, SVSTATE) + with m.If(exc_happened): + mmu = core.fus.get_exc("mmu0") + ldst = core.fus.get_exc("ldst0") + if mmu is not None: + with m.If(fetch_failed): + # instruction fetch: exception is from MMU + # reset instr_fault (highest priority) + sync += pdecode2.ldst_exc.eq(mmu) sync += pdecode2.instr_fault.eq(0) - m.next = "ISSUE_START" + if flush_needed: + # request icache to stop asserting "failed" + comb += core.icache.flush_in.eq(1) + with m.If(~fetch_failed): + # otherwise assume it was a LDST exception + sync += pdecode2.ldst_exc.eq(ldst) + + with m.If(exec_pc_o_valid): + + # was this the last loop iteration? + is_last = Signal() + cur_vl = cur_state.svstate.vl + comb += is_last.eq(next_srcstep == cur_vl) + + with m.If(pdecode2.instr_fault): + # reset instruction fault, try again + sync += pdecode2.instr_fault.eq(0) + m.next = "ISSUE_START" - # return directly to Decode if Execute generated an - # exception. - with m.Elif(pdecode2.ldst_exc.happened): - m.next = "DECODE_SV" + # return directly to Decode if Execute generated an + # exception. + with m.Elif(pdecode2.ldst_exc.happened): + m.next = "DECODE_SV" - # if MSR, PC or SVSTATE were changed by the previous - # instruction, go directly back to Fetch, without - # updating either MSR PC or SVSTATE - with m.Elif(self.msr_changed | self.pc_changed | - self.sv_changed): - m.next = "ISSUE_START" + # if MSR, PC or SVSTATE were changed by the previous + # instruction, go directly back to Fetch, without + # updating either MSR PC or SVSTATE + with m.Elif(self.msr_changed | self.pc_changed | + self.sv_changed): + m.next = "ISSUE_START" - # also return to Fetch, when no output was a vector - # (regardless of SRCSTEP and VL), or when the last - # instruction was really the last one of the VL loop - with m.Elif((~pdecode2.loop_continue) | is_last): - # before going back to fetch, update the PC state - # register with the NIA. - # ok here we are not reading the branch unit. - # TODO: this just blithely overwrites whatever - # pipeline updated the PC - comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) - comb += self.state_w_pc.i_data.eq(nia) - # reset SRCSTEP before returning to Fetch - if self.svp64_en: - with m.If(pdecode2.loop_continue): - comb += new_svstate.srcstep.eq(0) - comb += new_svstate.dststep.eq(0) - comb += self.update_svstate.eq(1) - else: + # also return to Fetch, when no output was a vector + # (regardless of SRCSTEP and VL), or when the last + # instruction was really the last one of the VL loop + with m.Elif((~pdecode2.loop_continue) | is_last): + # before going back to fetch, update the PC state + # register with the NIA. + # ok here we are not reading the branch unit. + # TODO: this just blithely overwrites whatever + # pipeline updated the PC + comb += self.state_w_pc.wen.eq(1 << StateRegs.PC) + comb += self.state_w_pc.i_data.eq(nia) + # reset SRCSTEP before returning to Fetch + if self.svp64_en: + with m.If(pdecode2.loop_continue): comb += new_svstate.srcstep.eq(0) comb += new_svstate.dststep.eq(0) comb += self.update_svstate.eq(1) - m.next = "ISSUE_START" - - # returning to Execute? then, first update SRCSTEP - with m.Else(): - comb += new_svstate.srcstep.eq(next_srcstep) - comb += new_svstate.dststep.eq(next_dststep) + else: + comb += new_svstate.srcstep.eq(0) + comb += new_svstate.dststep.eq(0) comb += self.update_svstate.eq(1) - # return to mask skip loop - m.next = "PRED_SKIP" - - with m.Else(): - comb += dbg.core_stopped_i.eq(1) - if flush_needed: - # request the icache to stop asserting "failed" - comb += core.icache.flush_in.eq(1) - # stop instruction fault - sync += pdecode2.instr_fault.eq(0) - # if terminated return to idle - with m.If(dbg.terminate_i): m.next = "ISSUE_START" + # returning to Execute? then, first update SRCSTEP + with m.Else(): + comb += new_svstate.srcstep.eq(next_srcstep) + comb += new_svstate.dststep.eq(next_dststep) + comb += self.update_svstate.eq(1) + # return to mask skip loop + m.next = "PRED_SKIP" + + # check if svstate needs updating: if so, write it to State Regfile with m.If(self.update_svstate): sync += cur_state.svstate.eq(self.new_svstate) # for next clock @@ -1364,6 +1505,7 @@ class TestIssuerInternal(TestIssuerBase): sync = m.d.sync dbg = self.dbg pdecode2 = self.pdecode2 + cur_state = self.cur_state # temporaries core_busy_o = core.n.o_data.busy_o # core is busy @@ -1390,14 +1532,25 @@ class TestIssuerInternal(TestIssuerBase): # instruction started: must wait till it finishes with m.State("INSN_ACTIVE"): # note changes to MSR, PC and SVSTATE - # XXX oops, really must monitor *all* State Regfile write - # ports looking for changes! with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)): sync += self.sv_changed.eq(1) with m.If(self.state_nia.wen & (1 << StateRegs.MSR)): sync += self.msr_changed.eq(1) with m.If(self.state_nia.wen & (1 << StateRegs.PC)): sync += self.pc_changed.eq(1) + # and note changes to DEC/TB, to be passed to DEC/TB FSM + with m.If(self.state_spr.wen & (1 << StateRegs.TB)): + comb += self.pause_dec_tb.eq(1) + # but also zero-out the cur_state DEC so that, on + # the next instruction, if it is "enable interrupt" + # the delay between the DEC/TB FSM reading and updating + # cur_state.dec doesn't trigger a spurious interrupt. + # the DEC/TB FSM will read the regfile and update to + # the correct value, so having cur_state.dec set to zero + # for a while is no big deal. + with m.If(self.state_spr.wen & (1 << StateRegs.DEC)): + comb += self.pause_dec_tb.eq(1) + sync += cur_state.dec.eq(0) # only needs top bit clear with m.If(~core_busy_o): # instruction done! comb += exec_pc_o_valid.eq(1) with m.If(exec_pc_i_ready): @@ -1484,21 +1637,9 @@ class TestIssuerInternal(TestIssuerBase): # Issue is where the VL for-loop # lives. the ready/valid # signalling is used to communicate between the four. - # set up Fetch FSM - fetch = FetchFSM(self.allow_overlap, self.svp64_en, - self.imem, core_rst, pdecode2, cur_state, - dbg, core, - dbg.state.svstate, # combinatorially same - nia, is_svp64_mode) - m.submodules.fetch = fetch - # connect up in/out data to existing Signals - comb += fetch.p.i_data.pc.eq(dbg.state.pc) # combinatorially same - comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same - # and the ready/valid signalling - comb += fetch_pc_o_ready.eq(fetch.p.o_ready) - comb += fetch.p.i_valid.eq(fetch_pc_i_valid) - comb += fetch_insn_o_valid.eq(fetch.n.o_valid) - comb += fetch.n.i_ready.eq(fetch_insn_i_ready) + self.fetch_fsm(m, dbg, core, core_rst, nia, is_svp64_mode, + fetch_pc_o_ready, fetch_pc_i_valid, + fetch_insn_o_valid, fetch_insn_i_ready) self.issue_fsm(m, core, nia, dbg, core_rst, is_svp64_mode, @@ -1518,15 +1659,17 @@ class TestIssuerInternal(TestIssuerBase): exec_insn_i_valid, exec_insn_o_ready, exec_pc_o_valid, exec_pc_i_ready) + # whatever was done above, over-ride it if core reset is held. + # set NIA to pc_at_reset + with m.If(core_rst): + sync += nia.eq(self.core.pc_at_reset) + return m class TestIssuer(Elaboratable): def __init__(self, pspec): self.ti = TestIssuerInternal(pspec) - # XXX TODO: make this a command-line selectable option from pspec - #from soc.simple.inorder import TestIssuerInternalInOrder - #self.ti = TestIssuerInternalInOrder(pspec) self.pll = DummyPLL(instance=True) self.dbg_rst_i = Signal(reset_less=True) @@ -1625,7 +1768,7 @@ if __name__ == '__main__': } pspec = TestMemPspec(ldst_ifacetype='bare_wb', imem_ifacetype='bare_wb', - addr_wid=48, + addr_wid=64, mask_wid=8, reg_wid=64, units=units)