src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from soc.decoder.power_decoder import create_pdecode
  25 from soc.decoder.power_decoder2 import PowerDecode2
  26 from soc.decoder.decode2execute1 import Data
  27 from soc.experiment.testmem import TestMemory # test only for instructions
  28 from soc.regfile.regfiles import StateRegs
  29 from soc.simple.core import NonProductionCore
  30 from soc.config.test.test_loadstore import TestMemPspec
  31 from soc.config.ifetch import ConfigFetchUnit
  32 from soc.decoder.power_enums import MicrOp
  33 from soc.debug.dmi import CoreDebug, DMIInterface
  34 from soc.config.state import CoreState
  35
  36 from nmutil.util import rising_edge
  37
  38
  39 class TestIssuer(Elaboratable):
  40     """TestIssuer - reads instructions from TestMemory and issues them
  41
  42     efficiency and speed is not the main goal here: functional correctness is.
  43     """
  44     def __init__(self, pspec):
  45         # main instruction core
  46         self.core = core = NonProductionCore(pspec)
  47
  48         # instruction decoder
  49         pdecode = create_pdecode()
  50         self.pdecode2 = PowerDecode2(pdecode)   # decoder
  51
  52         # Test Instruction memory
  53         self.imem = ConfigFetchUnit(pspec).fu
  54         # one-row cache of instruction read
  55         self.iline = Signal(64) # one instruction line
  56         self.iprev_adr = Signal(64) # previous address: if different, do read
  57
  58         # DMI interface
  59         self.dbg = CoreDebug()
  60
  61         # instruction go/monitor
  62         self.pc_o = Signal(64, reset_less=True)
  63         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
  64         self.core_bigendian_i = Signal()
  65         self.busy_o = Signal(reset_less=True)
  66         self.memerr_o = Signal(reset_less=True)
  67
  68         # FAST regfile read /write ports for PC and MSR
  69         staterf = self.core.regs.rf['state']
  70         self.state_r_pc = staterf.r_ports['cia'] # PC rd
  71         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
  72         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
  73
  74         # DMI interface access
  75         intrf = self.core.regs.rf['int']
  76         crrf = self.core.regs.rf['cr']
  77         self.int_r = intrf.r_ports['dmi'] # INT read
  78         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
  79
  80         # hack method of keeping an eye on whether branch/trap set the PC
  81         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
  82         self.state_nia.wen.name = 'state_nia_wen'
  83
  84     def elaborate(self, platform):
  85         m = Module()
  86         comb, sync = m.d.comb, m.d.sync
  87
  88         m.submodules.core = core = DomainRenamer("coresync")(self.core)
  89         m.submodules.imem = imem = self.imem
  90         m.submodules.dbg = dbg = self.dbg
  91
  92         # instruction decoder
  93         pdecode = create_pdecode()
  94         m.submodules.dec2 = pdecode2 = self.pdecode2
  95
  96         # convenience
  97         dmi, d_reg, d_cr = dbg.dmi, dbg.dbg_gpr, dbg.dbg_cr
  98         intrf = self.core.regs.rf['int']
  99
 100         # clock delay power-on reset
 101         cd_por  = ClockDomain(reset_less=True)
 102         cd_sync = ClockDomain()
 103         core_sync = ClockDomain("coresync")
 104         m.domains += cd_por, cd_sync, core_sync
 105
 106         delay = Signal(range(4), reset=3)
 107         with m.If(delay != 0):
 108             m.d.por += delay.eq(delay - 1)
 109         comb += cd_por.clk.eq(ClockSignal())
 110         comb += core_sync.clk.eq(ClockSignal())
 111         # power-on reset delay
 112         comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
 113
 114         # busy/halted signals from core
 115         comb += self.busy_o.eq(core.busy_o)
 116         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 117
 118         # current state (MSR/PC at the moment
 119         cur_state = CoreState("cur")
 120
 121         # temporary hack: says "go" immediately for both address gen and ST
 122         l0 = core.l0
 123         ldst = core.fus.fus['ldst0']
 124         st_go_edge = rising_edge(m, ldst.st.rel_o)
 125         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
 126         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
 127
 128         # PC and instruction from I-Memory
 129         pc_changed = Signal() # note write to PC
 130         comb += self.pc_o.eq(cur_state.pc)
 131         ilatch = Signal(32)
 132
 133         # next instruction (+4 on current)
 134         nia = Signal(64, reset_less=True)
 135         comb += nia.eq(cur_state.pc + 4)
 136
 137         # read the PC
 138         pc = Signal(64, reset_less=True)
 139         pc_ok_delay = Signal()
 140         sync += pc_ok_delay.eq(~self.pc_i.ok)
 141         with m.If(self.pc_i.ok):
 142             # incoming override (start from pc_i)
 143             comb += pc.eq(self.pc_i.data)
 144         with m.Else():
 145             # otherwise read StateRegs regfile for PC...
 146             comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
 147         # ... but on a 1-clock delay
 148         with m.If(pc_ok_delay):
 149             comb += pc.eq(self.state_r_pc.data_o)
 150
 151         # don't write pc every cycle
 152         comb += self.state_w_pc.wen.eq(0)
 153         comb += self.state_w_pc.data_i.eq(0)
 154
 155         # don't read msr every cycle
 156         comb += self.state_r_msr.ren.eq(0)
 157
 158         # connect up debug signals
 159         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
 160         comb += dbg.terminate_i.eq(core.core_terminate_o)
 161         comb += dbg.state.pc.eq(pc)
 162         #comb += dbg.state.pc.eq(cur_state.pc)
 163         comb += dbg.state.msr.eq(cur_state.msr)
 164
 165         # temporaries
 166         core_busy_o = core.busy_o                 # core is busy
 167         core_ivalid_i = core.ivalid_i             # instruction is valid
 168         core_issue_i = core.issue_i               # instruction is issued
 169         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 170
 171         insn_type = core.e.do.insn_type
 172         dec_state = pdecode2.state
 173
 174         # actually use a nmigen FSM for the first time (w00t)
 175         # this FSM is perhaps unusual in that it detects conditions
 176         # then "holds" information, combinatorially, for the core
 177         # (as opposed to using sync - which would be on a clock's delay)
 178         # this includes the actual opcode, valid flags and so on.
 179         with m.FSM() as fsm:
 180
 181             # waiting (zzz)
 182             with m.State("IDLE"):
 183                 sync += pc_changed.eq(0)
 184                 sync += core.e.eq(0)
 185                 with m.If(~dbg.core_stop_o & ~core.core_reset_i):
 186                     # instruction allowed to go: start by reading the PC
 187                     # capture the PC and also drop it into Insn Memory
 188                     # we have joined a pair of combinatorial memory
 189                     # lookups together.  this is Generally Bad.
 190                     comb += self.imem.a_pc_i.eq(pc)
 191                     comb += self.imem.a_valid_i.eq(1)
 192                     comb += self.imem.f_valid_i.eq(1)
 193                     sync += cur_state.pc.eq(pc)
 194
 195                     # initiate read of MSR
 196                     comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
 197
 198                     m.next = "INSN_READ" # move to "wait for bus" phase
 199                 with m.Else():
 200                     comb += core.core_stopped_i.eq(1)
 201                     comb += dbg.core_stopped_i.eq(1)
 202
 203             # dummy pause to find out why simulation is not keeping up
 204             with m.State("INSN_READ"):
 205                 # one cycle later, msr read arrives
 206                 sync += cur_state.msr.eq(self.state_r_msr.data_o)
 207                 with m.If(self.imem.f_busy_o): # zzz...
 208                     # busy: stay in wait-read
 209                     comb += self.imem.a_valid_i.eq(1)
 210                     comb += self.imem.f_valid_i.eq(1)
 211                 with m.Else():
 212                     # not busy: instruction fetched
 213                     f_instr_o = self.imem.f_instr_o
 214                     if f_instr_o.width == 32:
 215                         insn = f_instr_o
 216                     else:
 217                         insn = f_instr_o.word_select(cur_state.pc[2], 32)
 218                     comb += dec_opcode_i.eq(insn) # actual opcode
 219                     comb += dec_state.eq(cur_state)
 220                     sync += core.e.eq(pdecode2.e)
 221                     sync += ilatch.eq(insn) # latch current insn
 222                     # also drop PC and MSR into decode "state"
 223                     m.next = "INSN_START" # move to "start"
 224
 225             # waiting for instruction bus (stays there until not busy)
 226             with m.State("INSN_START"):
 227                 comb += core_ivalid_i.eq(1) # instruction is valid
 228                 comb += core_issue_i.eq(1)  # and issued
 229
 230
 231                 m.next = "INSN_ACTIVE" # move to "wait completion"
 232
 233             # instruction started: must wait till it finishes
 234             with m.State("INSN_ACTIVE"):
 235                 with m.If(insn_type != MicrOp.OP_NOP):
 236                     comb += core_ivalid_i.eq(1) # instruction is valid
 237                 with m.If(self.state_nia.wen):
 238                     sync += pc_changed.eq(1)
 239                 with m.If(~core_busy_o): # instruction done!
 240                     # ok here we are not reading the branch unit.  TODO
 241                     # this just blithely overwrites whatever pipeline
 242                     # updated the PC
 243                     with m.If(~pc_changed):
 244                         comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
 245                         comb += self.state_w_pc.data_i.eq(nia)
 246                     sync += core.e.eq(0)
 247                     m.next = "IDLE" # back to idle
 248
 249         # this bit doesn't have to be in the FSM: connect up to read
 250         # regfiles on demand from DMI
 251         with m.If(d_reg.req): # request for regfile access being made
 252             # TODO: error-check this
 253             # XXX should this be combinatorial?  sync better?
 254             if intrf.unary:
 255                 comb += self.int_r.ren.eq(1<<d_reg.addr)
 256             else:
 257                 comb += self.int_r.addr.eq(d_reg.addr)
 258                 comb += self.int_r.ren.eq(1)
 259         d_reg_delay  = Signal()
 260         sync += d_reg_delay.eq(d_reg.req)
 261         with m.If(d_reg_delay):
 262             # data arrives one clock later
 263             comb += d_reg.data.eq(self.int_r.data_o)
 264             comb += d_reg.ack.eq(1)
 265
 266         # sigh same thing for CR debug
 267         with m.If(d_cr.req): # request for regfile access being made
 268             comb += self.cr_r.ren.eq(0b11111111) # enable all
 269         d_cr_delay  = Signal()
 270         sync += d_cr_delay.eq(d_cr.req)
 271         with m.If(d_cr_delay):
 272             # data arrives one clock later
 273             comb += d_cr.data.eq(self.cr_r.data_o)
 274             comb += d_cr.ack.eq(1)
 275
 276         return m
 277
 278     def __iter__(self):
 279         yield from self.pc_i.ports()
 280         yield self.pc_o
 281         yield self.memerr_o
 282         yield from self.core.ports()
 283         yield from self.imem.ports()
 284         yield self.core_bigendian_i
 285         yield self.busy_o
 286
 287     def ports(self):
 288         return list(self)
 289
 290     def external_ports(self):
 291         return self.pc_i.ports() + [self.pc_o,
 292                                     self.memerr_o,
 293                                     self.core_bigendian_i,
 294                                     ClockSignal(),
 295                                     ResetSignal(),
 296                                     self.busy_o,
 297                                     ] + \
 298                 list(self.dbg.dmi.ports()) + \
 299                 list(self.imem.ibus.fields.values()) + \
 300                 list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
 301
 302     def ports(self):
 303         return list(self)
 304
 305
 306 if __name__ == '__main__':
 307     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
 308              'spr': 1,
 309              'div': 1,
 310              'mul': 1,
 311              'shiftrot': 1
 312             }
 313     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
 314                          imem_ifacetype='bare_wb',
 315                          addr_wid=48,
 316                          mask_wid=8,
 317                          reg_wid=64,
 318                          units=units)
 319     dut = TestIssuer(pspec)
 320     vl = main(dut, ports=dut.ports(), name="test_issuer")
 321
 322     if len(sys.argv) == 1:
 323         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
 324         with open("test_issuer.il", "w") as f:
 325             f.write(vl)