src/soc/simple/inorder.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal,
  19                     Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from openpower.consts import MSR
  28 from openpower.decoder.power_enums import MicrOp
  29 from openpower.state import CoreState
  30 from soc.regfile.regfiles import StateRegs
  31 from soc.config.test.test_loadstore import TestMemPspec
  32 from soc.experiment.icache import ICache
  33
  34 from nmutil.util import rising_edge
  35
  36 from soc.simple.issuer import TestIssuerBase
  37
  38 def get_insn(f_instr_o, pc):
  39     if f_instr_o.width == 32:
  40         return f_instr_o
  41     else:
  42         # 64-bit: bit 2 of pc decides which word to select
  43         return f_instr_o.word_select(pc[2], 32)
  44
  45
  46 # Fetch Finite State Machine.
  47 # WARNING: there are currently DriverConflicts but it's actually working.
  48 # TODO, here: everything that is global in nature, information from the
  49 # main TestIssuerInternal, needs to move to either ispec() or ospec().
  50 # not only that: TestIssuerInternal.imem can entirely move into here
  51 # because imem is only ever accessed inside the FetchFSM.
  52 class FetchFSM(ControlBase):
  53     def __init__(self, allow_overlap, imem, core_rst,
  54                  pdecode2, cur_state,
  55                  dbg, core, svstate, nia):
  56         self.allow_overlap = allow_overlap
  57         self.imem = imem
  58         self.core_rst = core_rst
  59         self.pdecode2 = pdecode2
  60         self.cur_state = cur_state
  61         self.dbg = dbg
  62         self.core = core
  63         self.svstate = svstate
  64         self.nia = nia
  65
  66         # set up pipeline ControlBase and allocate i/o specs
  67         # (unusual: normally done by the Pipeline API)
  68         super().__init__(stage=self)
  69         self.p.i_data, self.n.o_data = self.new_specs(None)
  70         self.i, self.o = self.p.i_data, self.n.o_data
  71
  72     # next 3 functions are Stage API Compliance
  73     def setup(self, m, i):
  74         pass
  75
  76     def ispec(self):
  77         return FetchInput()
  78
  79     def ospec(self):
  80         return FetchOutput()
  81
  82     def elaborate(self, platform):
  83         """fetch FSM
  84
  85         this FSM performs fetch of raw instruction data, partial-decodes
  86         it 32-bit at a time to detect SVP64 prefixes, and will optionally
  87         read a 2nd 32-bit quantity if that occurs.
  88         """
  89         m = super().elaborate(platform)
  90
  91         dbg = self.dbg
  92         core = self.core
  93         pc = self.i.pc
  94         msr = self.i.msr
  95         svstate = self.svstate
  96         nia = self.nia
  97         fetch_pc_o_ready = self.p.o_ready
  98         fetch_pc_i_valid = self.p.i_valid
  99         fetch_insn_o_valid = self.n.o_valid
 100         fetch_insn_i_ready = self.n.i_ready
 101
 102         comb = m.d.comb
 103         sync = m.d.sync
 104         pdecode2 = self.pdecode2
 105         cur_state = self.cur_state
 106         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 107
 108         # also note instruction fetch failed
 109         if hasattr(core, "icache"):
 110             fetch_failed = core.icache.i_out.fetch_failed
 111             flush_needed = True
 112         else:
 113             fetch_failed = Const(0, 1)
 114             flush_needed = False
 115
 116         # set priv / virt mode on I-Cache, sigh
 117         if isinstance(self.imem, ICache):
 118             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 119             comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
 120
 121         with m.FSM(name='fetch_fsm'):
 122
 123             # waiting (zzz)
 124             with m.State("IDLE"):
 125                 with m.If(~dbg.stopping_o & ~fetch_failed):
 126                     comb += fetch_pc_o_ready.eq(1)
 127                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 128                     # instruction allowed to go: start by reading the PC
 129                     # capture the PC and also drop it into Insn Memory
 130                     # we have joined a pair of combinatorial memory
 131                     # lookups together.  this is Generally Bad.
 132                     comb += self.imem.a_pc_i.eq(pc)
 133                     comb += self.imem.a_i_valid.eq(1)
 134                     comb += self.imem.f_i_valid.eq(1)
 135                     sync += cur_state.pc.eq(pc)
 136                     sync += cur_state.svstate.eq(svstate)  # and svstate
 137                     sync += cur_state.msr.eq(msr)  # and msr
 138
 139                     m.next = "INSN_READ"  # move to "wait for bus" phase
 140
 141             # dummy pause to find out why simulation is not keeping up
 142             with m.State("INSN_READ"):
 143                 if self.allow_overlap:
 144                     stopping = dbg.stopping_o
 145                 else:
 146                     stopping = Const(0)
 147                 with m.If(stopping):
 148                     # stopping: jump back to idle
 149                     m.next = "IDLE"
 150                 with m.Else():
 151                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 152                         # busy but not fetch failed: stay in wait-read
 153                         comb += self.imem.a_i_valid.eq(1)
 154                         comb += self.imem.f_i_valid.eq(1)
 155                     with m.Else():
 156                         # not busy (or fetch failed!): instruction fetched
 157                         # when fetch failed, the instruction gets ignored
 158                         # by the decoder
 159                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 160                         # not SVP64 - 32-bit only
 161                         sync += nia.eq(cur_state.pc + 4)
 162                         sync += dec_opcode_o.eq(insn)
 163                             m.next = "INSN_READY"
 164
 165             with m.State("INSN_READY"):
 166                 # hand over the instruction, to be decoded
 167                 comb += fetch_insn_o_valid.eq(1)
 168                 with m.If(fetch_insn_i_ready):
 169                     m.next = "IDLE"
 170
 171         # whatever was done above, over-ride it if core reset is held
 172         with m.If(self.core_rst):
 173             sync += nia.eq(0)
 174
 175         return m
 176
 177
 178 class TestIssuerInternalInOrder(TestIssuerBase):
 179     """TestIssuer - reads instructions from TestMemory and issues them
 180
 181     efficiency and speed is not the main goal here: functional correctness
 182     and code clarity is.  optimisations (which almost 100% interfere with
 183     easy understanding) come later.
 184     """
 185
 186     def issue_fsm(self, m, core, nia,
 187                   dbg, core_rst,
 188                   fetch_pc_o_ready, fetch_pc_i_valid,
 189                   fetch_insn_o_valid, fetch_insn_i_ready,
 190                   exec_insn_i_valid, exec_insn_o_ready,
 191                   exec_pc_o_valid, exec_pc_i_ready):
 192         """issue FSM
 193
 194         decode / issue FSM.  this interacts with the "fetch" FSM
 195         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 196         (outgoing). also interacts with the "execute" FSM
 197         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 198         (incoming).
 199         SVP64 RM prefixes have already been set up by the
 200         "fetch" phase, so execute is fairly straightforward.
 201         """
 202
 203         comb = m.d.comb
 204         sync = m.d.sync
 205         pdecode2 = self.pdecode2
 206         cur_state = self.cur_state
 207
 208         # temporaries
 209         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 210
 211         # note if an exception happened.  in a pipelined or OoO design
 212         # this needs to be accompanied by "shadowing" (or stalling)
 213         exc_happened = self.core.o.exc_happened
 214         # also note instruction fetch failed
 215         if hasattr(core, "icache"):
 216             fetch_failed = core.icache.i_out.fetch_failed
 217             flush_needed = True
 218             # set to fault in decoder
 219             # update (highest priority) instruction fault
 220             rising_fetch_failed = rising_edge(m, fetch_failed)
 221             with m.If(rising_fetch_failed):
 222                 sync += pdecode2.instr_fault.eq(1)
 223         else:
 224             fetch_failed = Const(0, 1)
 225             flush_needed = False
 226
 227         with m.FSM(name="issue_fsm"):
 228
 229             # sync with the "fetch" phase which is reading the instruction
 230             # at this point, there is no instruction running, that
 231             # could inadvertently update the PC.
 232             with m.State("ISSUE_START"):
 233                 # reset instruction fault
 234                 sync += pdecode2.instr_fault.eq(0)
 235                 # wait on "core stop" release, before next fetch
 236                 # need to do this here, in case we are in a VL==0 loop
 237                 with m.If(~dbg.core_stop_o & ~core_rst):
 238                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 239                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 240                         m.next = "INSN_WAIT"
 241                 with m.Else():
 242                     # tell core it's stopped, and acknowledge debug handshake
 243                     comb += dbg.core_stopped_i.eq(1)
 244
 245             # wait for an instruction to arrive from Fetch
 246             with m.State("INSN_WAIT"):
 247                 if self.allow_overlap:
 248                     stopping = dbg.stopping_o
 249                 else:
 250                     stopping = Const(0)
 251                 with m.If(stopping):
 252                     # stopping: jump back to idle
 253                     m.next = "ISSUE_START"
 254                     if flush_needed:
 255                         # request the icache to stop asserting "failed"
 256                         comb += core.icache.flush_in.eq(1)
 257                     # stop instruction fault
 258                     sync += pdecode2.instr_fault.eq(0)
 259                 with m.Else():
 260                     comb += fetch_insn_i_ready.eq(1)
 261                     with m.If(fetch_insn_o_valid):
 262                         # loop into ISSUE_START if it's a SVP64 instruction
 263                         # and VL == 0.  this because VL==0 is a for-loop
 264                         # from 0 to 0 i.e. always, always a NOP.
 265                         m.next = "DECODE_SV"  # skip predication
 266
 267             # after src/dst step have been updated, we are ready
 268             # to decode the instruction
 269             with m.State("DECODE_SV"):
 270                 # decode the instruction
 271                 with m.If(~fetch_failed):
 272                     sync += pdecode2.instr_fault.eq(0)
 273                 sync += core.i.e.eq(pdecode2.e)
 274                 sync += core.i.state.eq(cur_state)
 275                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 276                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 277                 # after decoding, reset any previous exception condition,
 278                 # allowing it to be set again during the next execution
 279                 sync += pdecode2.ldst_exc.eq(0)
 280
 281                 m.next = "INSN_EXECUTE"  # move to "execute"
 282
 283             # handshake with execution FSM, move to "wait" once acknowledged
 284             with m.State("INSN_EXECUTE"):
 285                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 286                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 287                     m.next = "EXECUTE_WAIT"
 288
 289             with m.State("EXECUTE_WAIT"):
 290                 # wait on "core stop" release, at instruction end
 291                 # need to do this here, in case we are in a VL>1 loop
 292                 with m.If(~dbg.core_stop_o & ~core_rst):
 293                     comb += exec_pc_i_ready.eq(1)
 294                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 295                     # the exception info needs to be blatted into
 296                     # pdecode.ldst_exc, and the instruction "re-run".
 297                     # when ldst_exc.happened is set, the PowerDecoder2
 298                     # reacts very differently: it re-writes the instruction
 299                     # with a "trap" (calls PowerDecoder2.trap()) which
 300                     # will *overwrite* whatever was requested and jump the
 301                     # PC to the exception address, as well as alter MSR.
 302                     # nothing else needs to be done other than to note
 303                     # the change of PC and MSR (and, later, SVSTATE)
 304                     with m.If(exc_happened):
 305                         mmu = core.fus.get_exc("mmu0")
 306                         ldst = core.fus.get_exc("ldst0")
 307                         if mmu is not None:
 308                             with m.If(fetch_failed):
 309                                 # instruction fetch: exception is from MMU
 310                                 # reset instr_fault (highest priority)
 311                                 sync += pdecode2.ldst_exc.eq(mmu)
 312                                 sync += pdecode2.instr_fault.eq(0)
 313                                 if flush_needed:
 314                                     # request icache to stop asserting "failed"
 315                                     comb += core.icache.flush_in.eq(1)
 316                         with m.If(~fetch_failed):
 317                             # otherwise assume it was a LDST exception
 318                             sync += pdecode2.ldst_exc.eq(ldst)
 319
 320                     with m.If(exec_pc_o_valid):
 321
 322                         # return directly to Decode if Execute generated an
 323                         # exception.
 324                         with m.If(pdecode2.ldst_exc.happened):
 325                             m.next = "DECODE_SV"
 326
 327                         # if MSR, PC or SVSTATE were changed by the previous
 328                         # instruction, go directly back to Fetch, without
 329                         # updating either MSR PC or SVSTATE
 330                         with m.Elif(self.msr_changed | self.pc_changed |
 331                                     self.sv_changed):
 332                             m.next = "ISSUE_START"
 333
 334                         # returning to Execute? then, first update SRCSTEP
 335                         with m.Else():
 336                             # return to mask skip loop
 337                             m.next = "DECODE_SV"
 338
 339                 with m.Else():
 340                     comb += dbg.core_stopped_i.eq(1)
 341                     if flush_needed:
 342                         # request the icache to stop asserting "failed"
 343                         comb += core.icache.flush_in.eq(1)
 344                     # stop instruction fault
 345                     sync += pdecode2.instr_fault.eq(0)
 346                     if flush_needed:
 347                         # request the icache to stop asserting "failed"
 348                         comb += core.icache.flush_in.eq(1)
 349                     # stop instruction fault
 350                     sync += pdecode2.instr_fault.eq(0)
 351
 352     def execute_fsm(self, m, core,
 353                     exec_insn_i_valid, exec_insn_o_ready,
 354                     exec_pc_o_valid, exec_pc_i_ready):
 355         """execute FSM
 356
 357         execute FSM. this interacts with the "issue" FSM
 358         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 359         (outgoing). SVP64 RM prefixes have already been set up by the
 360         "issue" phase, so execute is fairly straightforward.
 361         """
 362
 363         comb = m.d.comb
 364         sync = m.d.sync
 365         pdecode2 = self.pdecode2
 366
 367         # temporaries
 368         core_busy_o = core.n.o_data.busy_o  # core is busy
 369         core_ivalid_i = core.p.i_valid              # instruction is valid
 370
 371         if hasattr(core, "icache"):
 372             fetch_failed = core.icache.i_out.fetch_failed
 373         else:
 374             fetch_failed = Const(0, 1)
 375
 376         with m.FSM(name="exec_fsm"):
 377
 378             # waiting for instruction bus (stays there until not busy)
 379             with m.State("INSN_START"):
 380                 comb += exec_insn_o_ready.eq(1)
 381                 with m.If(exec_insn_i_valid):
 382                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 383                     sync += self.sv_changed.eq(0)
 384                     sync += self.pc_changed.eq(0)
 385                     sync += self.msr_changed.eq(0)
 386                     with m.If(core.p.o_ready):  # only move if accepted
 387                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 388
 389             # instruction started: must wait till it finishes
 390             with m.State("INSN_ACTIVE"):
 391                 # note changes to MSR, PC and SVSTATE
 392                 # XXX oops, really must monitor *all* State Regfile write
 393                 # ports looking for changes!
 394                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
 395                     sync += self.sv_changed.eq(1)
 396                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
 397                     sync += self.msr_changed.eq(1)
 398                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
 399                     sync += self.pc_changed.eq(1)
 400                 with m.If(~core_busy_o):  # instruction done!
 401                     comb += exec_pc_o_valid.eq(1)
 402                     with m.If(exec_pc_i_ready):
 403                         # when finished, indicate "done".
 404                         # however, if there was an exception, the instruction
 405                         # is *not* yet done.  this is an implementation
 406                         # detail: we choose to implement exceptions by
 407                         # taking the exception information from the LDST
 408                         # unit, putting that *back* into the PowerDecoder2,
 409                         # and *re-running the entire instruction*.
 410                         # if we erroneously indicate "done" here, it is as if
 411                         # there were *TWO* instructions:
 412                         # 1) the failed LDST 2) a TRAP.
 413                         with m.If(~pdecode2.ldst_exc.happened &
 414                                   ~fetch_failed):
 415                             comb += self.insn_done.eq(1)
 416                         m.next = "INSN_START"  # back to fetch
 417
 418     def elaborate(self, platform):
 419         m = super().elaborate(platform)
 420         # convenience
 421         comb, sync = m.d.comb, m.d.sync
 422         cur_state = self.cur_state
 423         pdecode2 = self.pdecode2
 424         dbg = self.dbg
 425         core = self.core
 426
 427         # set up peripherals and core
 428         core_rst = self.core_rst
 429
 430         # indicate to outside world if any FU is still executing
 431         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
 432
 433         # address of the next instruction, in the absence of a branch
 434         # depends on the instruction size
 435         nia = Signal(64)
 436
 437         # connect up debug signals
 438         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
 439
 440         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
 441         # issue, decode/execute, now joined by "Predicate fetch/calculate".
 442         # these are the handshake signals between each
 443
 444         # fetch FSM can run as soon as the PC is valid
 445         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
 446         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
 447
 448         # fetch FSM hands over the instruction to be decoded / issued
 449         fetch_insn_o_valid = Signal()
 450         fetch_insn_i_ready = Signal()
 451
 452         # issue FSM delivers the instruction to the be executed
 453         exec_insn_i_valid = Signal()
 454         exec_insn_o_ready = Signal()
 455
 456         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
 457         exec_pc_o_valid = Signal()
 458         exec_pc_i_ready = Signal()
 459
 460         # the FSMs here are perhaps unusual in that they detect conditions
 461         # then "hold" information, combinatorially, for the core
 462         # (as opposed to using sync - which would be on a clock's delay)
 463         # this includes the actual opcode, valid flags and so on.
 464
 465         # Fetch, then predicate fetch, then Issue, then Execute.
 466         # Issue is where the VL for-loop # lives.  the ready/valid
 467         # signalling is used to communicate between the four.
 468
 469         # set up Fetch FSM
 470         fetch = FetchFSM(self.allow_overlap,
 471                          self.imem, core_rst, pdecode2, cur_state,
 472                          dbg, core,
 473                          dbg.state.svstate, # combinatorially same
 474                          nia)
 475         m.submodules.fetch = fetch
 476         # connect up in/out data to existing Signals
 477         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
 478         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
 479         # and the ready/valid signalling
 480         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
 481         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
 482         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
 483         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
 484
 485         self.issue_fsm(m, core, nia,
 486                        dbg, core_rst,
 487                        fetch_pc_o_ready, fetch_pc_i_valid,
 488                        fetch_insn_o_valid, fetch_insn_i_ready,
 489                        exec_insn_i_valid, exec_insn_o_ready,
 490                        exec_pc_o_valid, exec_pc_i_ready)
 491
 492         self.execute_fsm(m, core,
 493                          exec_insn_i_valid, exec_insn_o_ready,
 494                          exec_pc_o_valid, exec_pc_i_ready)
 495
 496         return m
 497
 498
 499 # XXX TODO: update this
 500
 501 if __name__ == '__main__':
 502     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
 503              'spr': 1,
 504              'div': 1,
 505              'mul': 1,
 506              'shiftrot': 1
 507              }
 508     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
 509                          imem_ifacetype='bare_wb',
 510                          addr_wid=48,
 511                          mask_wid=8,
 512                          reg_wid=64,
 513                          units=units)
 514     dut = TestIssuer(pspec)
 515     vl = main(dut, ports=dut.ports(), name="test_issuer")
 516
 517     if len(sys.argv) == 1:
 518         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
 519         with open("test_issuer.il", "w") as f:
 520             f.write(vl)