src/soc/simple/inorder.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal,
  19                     Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from openpower.decoder.power_enums import MicrOp
  28 from openpower.state import CoreState
  29 from soc.regfile.regfiles import StateRegs
  30 from soc.config.test.test_loadstore import TestMemPspec
  31 from soc.experiment.icache import ICache
  32
  33 from nmutil.util import rising_edge
  34
  35 from soc.simple.issuer import TestIssuerBase
  36
  37 def get_insn(f_instr_o, pc):
  38     if f_instr_o.width == 32:
  39         return f_instr_o
  40     else:
  41         # 64-bit: bit 2 of pc decides which word to select
  42         return f_instr_o.word_select(pc[2], 32)
  43
  44
  45 # Fetch Finite State Machine.
  46 # WARNING: there are currently DriverConflicts but it's actually working.
  47 # TODO, here: everything that is global in nature, information from the
  48 # main TestIssuerInternal, needs to move to either ispec() or ospec().
  49 # not only that: TestIssuerInternal.imem can entirely move into here
  50 # because imem is only ever accessed inside the FetchFSM.
  51 class FetchFSM(ControlBase):
  52     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
  53                  pdecode2, cur_state,
  54                  dbg, core, svstate, nia, is_svp64_mode):
  55         self.allow_overlap = allow_overlap
  56         self.svp64_en = svp64_en
  57         self.imem = imem
  58         self.core_rst = core_rst
  59         self.pdecode2 = pdecode2
  60         self.cur_state = cur_state
  61         self.dbg = dbg
  62         self.core = core
  63         self.svstate = svstate
  64         self.nia = nia
  65         self.is_svp64_mode = is_svp64_mode
  66
  67         # set up pipeline ControlBase and allocate i/o specs
  68         # (unusual: normally done by the Pipeline API)
  69         super().__init__(stage=self)
  70         self.p.i_data, self.n.o_data = self.new_specs(None)
  71         self.i, self.o = self.p.i_data, self.n.o_data
  72
  73     # next 3 functions are Stage API Compliance
  74     def setup(self, m, i):
  75         pass
  76
  77     def ispec(self):
  78         return FetchInput()
  79
  80     def ospec(self):
  81         return FetchOutput()
  82
  83     def elaborate(self, platform):
  84         """fetch FSM
  85
  86         this FSM performs fetch of raw instruction data, partial-decodes
  87         it 32-bit at a time to detect SVP64 prefixes, and will optionally
  88         read a 2nd 32-bit quantity if that occurs.
  89         """
  90         m = super().elaborate(platform)
  91
  92         dbg = self.dbg
  93         core = self.core
  94         pc = self.i.pc
  95         msr = self.i.msr
  96         svstate = self.svstate
  97         nia = self.nia
  98         is_svp64_mode = self.is_svp64_mode
  99         fetch_pc_o_ready = self.p.o_ready
 100         fetch_pc_i_valid = self.p.i_valid
 101         fetch_insn_o_valid = self.n.o_valid
 102         fetch_insn_i_ready = self.n.i_ready
 103
 104         comb = m.d.comb
 105         sync = m.d.sync
 106         pdecode2 = self.pdecode2
 107         cur_state = self.cur_state
 108         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 109
 110         # also note instruction fetch failed
 111         if hasattr(core, "icache"):
 112             fetch_failed = core.icache.i_out.fetch_failed
 113             flush_needed = True
 114         else:
 115             fetch_failed = Const(0, 1)
 116             flush_needed = False
 117
 118         with m.FSM(name='fetch_fsm'):
 119
 120             # waiting (zzz)
 121             with m.State("IDLE"):
 122                 with m.If(~dbg.stopping_o & ~fetch_failed):
 123                     comb += fetch_pc_o_ready.eq(1)
 124                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 125                     # instruction allowed to go: start by reading the PC
 126                     # capture the PC and also drop it into Insn Memory
 127                     # we have joined a pair of combinatorial memory
 128                     # lookups together.  this is Generally Bad.
 129                     comb += self.imem.a_pc_i.eq(pc)
 130                     comb += self.imem.a_i_valid.eq(1)
 131                     comb += self.imem.f_i_valid.eq(1)
 132                     sync += cur_state.pc.eq(pc)
 133                     sync += cur_state.svstate.eq(svstate)  # and svstate
 134                     sync += cur_state.msr.eq(msr)  # and msr
 135
 136                     m.next = "INSN_READ"  # move to "wait for bus" phase
 137
 138             # dummy pause to find out why simulation is not keeping up
 139             with m.State("INSN_READ"):
 140                 if self.allow_overlap:
 141                     stopping = dbg.stopping_o
 142                 else:
 143                     stopping = Const(0)
 144                 with m.If(stopping):
 145                     # stopping: jump back to idle
 146                     m.next = "IDLE"
 147                 with m.Else():
 148                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 149                         # busy but not fetch failed: stay in wait-read
 150                         comb += self.imem.a_i_valid.eq(1)
 151                         comb += self.imem.f_i_valid.eq(1)
 152                     with m.Else():
 153                         # not busy (or fetch failed!): instruction fetched
 154                         # when fetch failed, the instruction gets ignored
 155                         # by the decoder
 156                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 157                         if self.svp64_en:
 158                             svp64 = self.svp64
 159                             # decode the SVP64 prefix, if any
 160                             comb += svp64.raw_opcode_in.eq(insn)
 161                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 162                             # pass the decoded prefix (if any) to PowerDecoder2
 163                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 164                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 165                             # remember whether this is a prefixed instruction,
 166                             # so the FSM can readily loop when VL==0
 167                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 168                             # calculate the address of the following instruction
 169                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 170                             sync += nia.eq(cur_state.pc + insn_size)
 171                             with m.If(~svp64.is_svp64_mode):
 172                                 # with no prefix, store the instruction
 173                                 # and hand it directly to the next FSM
 174                                 sync += dec_opcode_o.eq(insn)
 175                                 m.next = "INSN_READY"
 176                             with m.Else():
 177                                 # fetch the rest of the instruction from memory
 178                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 179                                 comb += self.imem.a_i_valid.eq(1)
 180                                 comb += self.imem.f_i_valid.eq(1)
 181                                 m.next = "INSN_READ2"
 182                         else:
 183                             # not SVP64 - 32-bit only
 184                             sync += nia.eq(cur_state.pc + 4)
 185                             sync += dec_opcode_o.eq(insn)
 186                             m.next = "INSN_READY"
 187
 188             with m.State("INSN_READ2"):
 189                 with m.If(self.imem.f_busy_o):  # zzz...
 190                     # busy: stay in wait-read
 191                     comb += self.imem.a_i_valid.eq(1)
 192                     comb += self.imem.f_i_valid.eq(1)
 193                 with m.Else():
 194                     # not busy: instruction fetched
 195                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 196                     sync += dec_opcode_o.eq(insn)
 197                     m.next = "INSN_READY"
 198
 199             with m.State("INSN_READY"):
 200                 # hand over the instruction, to be decoded
 201                 comb += fetch_insn_o_valid.eq(1)
 202                 with m.If(fetch_insn_i_ready):
 203                     m.next = "IDLE"
 204
 205         # whatever was done above, over-ride it if core reset is held
 206         with m.If(self.core_rst):
 207             sync += nia.eq(0)
 208
 209         return m
 210
 211
 212 class TestIssuerInternalInOrder(TestIssuerBase):
 213     """TestIssuer - reads instructions from TestMemory and issues them
 214
 215     efficiency and speed is not the main goal here: functional correctness
 216     and code clarity is.  optimisations (which almost 100% interfere with
 217     easy understanding) come later.
 218     """
 219
 220     def issue_fsm(self, m, core, nia,
 221                   dbg, core_rst, is_svp64_mode,
 222                   fetch_pc_o_ready, fetch_pc_i_valid,
 223                   fetch_insn_o_valid, fetch_insn_i_ready,
 224                   exec_insn_i_valid, exec_insn_o_ready,
 225                   exec_pc_o_valid, exec_pc_i_ready):
 226         """issue FSM
 227
 228         decode / issue FSM.  this interacts with the "fetch" FSM
 229         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 230         (outgoing). also interacts with the "execute" FSM
 231         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 232         (incoming).
 233         SVP64 RM prefixes have already been set up by the
 234         "fetch" phase, so execute is fairly straightforward.
 235         """
 236
 237         comb = m.d.comb
 238         sync = m.d.sync
 239         pdecode2 = self.pdecode2
 240         cur_state = self.cur_state
 241
 242         # temporaries
 243         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 244
 245         # note if an exception happened.  in a pipelined or OoO design
 246         # this needs to be accompanied by "shadowing" (or stalling)
 247         exc_happened = self.core.o.exc_happened
 248         # also note instruction fetch failed
 249         if hasattr(core, "icache"):
 250             fetch_failed = core.icache.i_out.fetch_failed
 251             flush_needed = True
 252             # set to fault in decoder
 253             # update (highest priority) instruction fault
 254             rising_fetch_failed = rising_edge(m, fetch_failed)
 255             with m.If(rising_fetch_failed):
 256                 sync += pdecode2.instr_fault.eq(1)
 257         else:
 258             fetch_failed = Const(0, 1)
 259             flush_needed = False
 260
 261         with m.FSM(name="issue_fsm"):
 262
 263             # sync with the "fetch" phase which is reading the instruction
 264             # at this point, there is no instruction running, that
 265             # could inadvertently update the PC.
 266             with m.State("ISSUE_START"):
 267                 # reset instruction fault
 268                 sync += pdecode2.instr_fault.eq(0)
 269                 # wait on "core stop" release, before next fetch
 270                 # need to do this here, in case we are in a VL==0 loop
 271                 with m.If(~dbg.core_stop_o & ~core_rst):
 272                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 273                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 274                         m.next = "INSN_WAIT"
 275                 with m.Else():
 276                     # tell core it's stopped, and acknowledge debug handshake
 277                     comb += dbg.core_stopped_i.eq(1)
 278
 279             # wait for an instruction to arrive from Fetch
 280             with m.State("INSN_WAIT"):
 281                 if self.allow_overlap:
 282                     stopping = dbg.stopping_o
 283                 else:
 284                     stopping = Const(0)
 285                 with m.If(stopping):
 286                     # stopping: jump back to idle
 287                     m.next = "ISSUE_START"
 288                     if flush_needed:
 289                         # request the icache to stop asserting "failed"
 290                         comb += core.icache.flush_in.eq(1)
 291                     # stop instruction fault
 292                     sync += pdecode2.instr_fault.eq(0)
 293                 with m.Else():
 294                     comb += fetch_insn_i_ready.eq(1)
 295                     with m.If(fetch_insn_o_valid):
 296                         # loop into ISSUE_START if it's a SVP64 instruction
 297                         # and VL == 0.  this because VL==0 is a for-loop
 298                         # from 0 to 0 i.e. always, always a NOP.
 299                         m.next = "DECODE_SV"  # skip predication
 300
 301             # after src/dst step have been updated, we are ready
 302             # to decode the instruction
 303             with m.State("DECODE_SV"):
 304                 # decode the instruction
 305                 with m.If(~fetch_failed):
 306                     sync += pdecode2.instr_fault.eq(0)
 307                 sync += core.i.e.eq(pdecode2.e)
 308                 sync += core.i.state.eq(cur_state)
 309                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 310                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 311                 # after decoding, reset any previous exception condition,
 312                 # allowing it to be set again during the next execution
 313                 sync += pdecode2.ldst_exc.eq(0)
 314
 315                 m.next = "INSN_EXECUTE"  # move to "execute"
 316
 317             # handshake with execution FSM, move to "wait" once acknowledged
 318             with m.State("INSN_EXECUTE"):
 319                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 320                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 321                     m.next = "EXECUTE_WAIT"
 322
 323             with m.State("EXECUTE_WAIT"):
 324                 # wait on "core stop" release, at instruction end
 325                 # need to do this here, in case we are in a VL>1 loop
 326                 with m.If(~dbg.core_stop_o & ~core_rst):
 327                     comb += exec_pc_i_ready.eq(1)
 328                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 329                     # the exception info needs to be blatted into
 330                     # pdecode.ldst_exc, and the instruction "re-run".
 331                     # when ldst_exc.happened is set, the PowerDecoder2
 332                     # reacts very differently: it re-writes the instruction
 333                     # with a "trap" (calls PowerDecoder2.trap()) which
 334                     # will *overwrite* whatever was requested and jump the
 335                     # PC to the exception address, as well as alter MSR.
 336                     # nothing else needs to be done other than to note
 337                     # the change of PC and MSR (and, later, SVSTATE)
 338                     with m.If(exc_happened):
 339                         mmu = core.fus.get_exc("mmu0")
 340                         ldst = core.fus.get_exc("ldst0")
 341                         if mmu is not None:
 342                             with m.If(fetch_failed):
 343                                 # instruction fetch: exception is from MMU
 344                                 # reset instr_fault (highest priority)
 345                                 sync += pdecode2.ldst_exc.eq(mmu)
 346                                 sync += pdecode2.instr_fault.eq(0)
 347                                 if flush_needed:
 348                                     # request icache to stop asserting "failed"
 349                                     comb += core.icache.flush_in.eq(1)
 350                         with m.If(~fetch_failed):
 351                             # otherwise assume it was a LDST exception
 352                             sync += pdecode2.ldst_exc.eq(ldst)
 353
 354                     with m.If(exec_pc_o_valid):
 355
 356                         # return directly to Decode if Execute generated an
 357                         # exception.
 358                         with m.If(pdecode2.ldst_exc.happened):
 359                             m.next = "DECODE_SV"
 360
 361                         # if MSR, PC or SVSTATE were changed by the previous
 362                         # instruction, go directly back to Fetch, without
 363                         # updating either MSR PC or SVSTATE
 364                         with m.Elif(self.msr_changed | self.pc_changed |
 365                                     self.sv_changed):
 366                             m.next = "ISSUE_START"
 367
 368                         # returning to Execute? then, first update SRCSTEP
 369                         with m.Else():
 370                             # return to mask skip loop
 371                             m.next = "DECODE_SV"
 372
 373                 with m.Else():
 374                     comb += dbg.core_stopped_i.eq(1)
 375                     if flush_needed:
 376                         # request the icache to stop asserting "failed"
 377                         comb += core.icache.flush_in.eq(1)
 378                     # stop instruction fault
 379                     sync += pdecode2.instr_fault.eq(0)
 380                     if flush_needed:
 381                         # request the icache to stop asserting "failed"
 382                         comb += core.icache.flush_in.eq(1)
 383                     # stop instruction fault
 384                     sync += pdecode2.instr_fault.eq(0)
 385
 386     def execute_fsm(self, m, core,
 387                     exec_insn_i_valid, exec_insn_o_ready,
 388                     exec_pc_o_valid, exec_pc_i_ready):
 389         """execute FSM
 390
 391         execute FSM. this interacts with the "issue" FSM
 392         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 393         (outgoing). SVP64 RM prefixes have already been set up by the
 394         "issue" phase, so execute is fairly straightforward.
 395         """
 396
 397         comb = m.d.comb
 398         sync = m.d.sync
 399         pdecode2 = self.pdecode2
 400
 401         # temporaries
 402         core_busy_o = core.n.o_data.busy_o  # core is busy
 403         core_ivalid_i = core.p.i_valid              # instruction is valid
 404
 405         if hasattr(core, "icache"):
 406             fetch_failed = core.icache.i_out.fetch_failed
 407         else:
 408             fetch_failed = Const(0, 1)
 409
 410         with m.FSM(name="exec_fsm"):
 411
 412             # waiting for instruction bus (stays there until not busy)
 413             with m.State("INSN_START"):
 414                 comb += exec_insn_o_ready.eq(1)
 415                 with m.If(exec_insn_i_valid):
 416                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 417                     sync += self.sv_changed.eq(0)
 418                     sync += self.pc_changed.eq(0)
 419                     sync += self.msr_changed.eq(0)
 420                     with m.If(core.p.o_ready):  # only move if accepted
 421                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 422
 423             # instruction started: must wait till it finishes
 424             with m.State("INSN_ACTIVE"):
 425                 # note changes to MSR, PC and SVSTATE
 426                 # XXX oops, really must monitor *all* State Regfile write
 427                 # ports looking for changes!
 428                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
 429                     sync += self.sv_changed.eq(1)
 430                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
 431                     sync += self.msr_changed.eq(1)
 432                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
 433                     sync += self.pc_changed.eq(1)
 434                 with m.If(~core_busy_o):  # instruction done!
 435                     comb += exec_pc_o_valid.eq(1)
 436                     with m.If(exec_pc_i_ready):
 437                         # when finished, indicate "done".
 438                         # however, if there was an exception, the instruction
 439                         # is *not* yet done.  this is an implementation
 440                         # detail: we choose to implement exceptions by
 441                         # taking the exception information from the LDST
 442                         # unit, putting that *back* into the PowerDecoder2,
 443                         # and *re-running the entire instruction*.
 444                         # if we erroneously indicate "done" here, it is as if
 445                         # there were *TWO* instructions:
 446                         # 1) the failed LDST 2) a TRAP.
 447                         with m.If(~pdecode2.ldst_exc.happened &
 448                                   ~fetch_failed):
 449                             comb += self.insn_done.eq(1)
 450                         m.next = "INSN_START"  # back to fetch
 451
 452     def elaborate(self, platform):
 453         m = super().elaborate(platform)
 454         # convenience
 455         comb, sync = m.d.comb, m.d.sync
 456         cur_state = self.cur_state
 457         pdecode2 = self.pdecode2
 458         dbg = self.dbg
 459         core = self.core
 460
 461         # set up peripherals and core
 462         core_rst = self.core_rst
 463
 464         # indicate to outside world if any FU is still executing
 465         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
 466
 467         # address of the next instruction, in the absence of a branch
 468         # depends on the instruction size
 469         nia = Signal(64)
 470
 471         # connect up debug signals
 472         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
 473
 474         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
 475         # issue, decode/execute, now joined by "Predicate fetch/calculate".
 476         # these are the handshake signals between each
 477
 478         # fetch FSM can run as soon as the PC is valid
 479         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
 480         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
 481
 482         # fetch FSM hands over the instruction to be decoded / issued
 483         fetch_insn_o_valid = Signal()
 484         fetch_insn_i_ready = Signal()
 485
 486         # issue FSM delivers the instruction to the be executed
 487         exec_insn_i_valid = Signal()
 488         exec_insn_o_ready = Signal()
 489
 490         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
 491         exec_pc_o_valid = Signal()
 492         exec_pc_i_ready = Signal()
 493
 494         # the FSMs here are perhaps unusual in that they detect conditions
 495         # then "hold" information, combinatorially, for the core
 496         # (as opposed to using sync - which would be on a clock's delay)
 497         # this includes the actual opcode, valid flags and so on.
 498
 499         # Fetch, then predicate fetch, then Issue, then Execute.
 500         # Issue is where the VL for-loop # lives.  the ready/valid
 501         # signalling is used to communicate between the four.
 502
 503         # set up Fetch FSM
 504         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
 505                          self.imem, core_rst, pdecode2, cur_state,
 506                          dbg, core,
 507                          dbg.state.svstate, # combinatorially same
 508                          nia)
 509         m.submodules.fetch = fetch
 510         # connect up in/out data to existing Signals
 511         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
 512         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
 513         # and the ready/valid signalling
 514         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
 515         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
 516         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
 517         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
 518
 519         self.issue_fsm(m, core, nia,
 520                        dbg, core_rst,
 521                        fetch_pc_o_ready, fetch_pc_i_valid,
 522                        fetch_insn_o_valid, fetch_insn_i_ready,
 523                        exec_insn_i_valid, exec_insn_o_ready,
 524                        exec_pc_o_valid, exec_pc_i_ready)
 525
 526         self.execute_fsm(m, core,
 527                          exec_insn_i_valid, exec_insn_o_ready,
 528                          exec_pc_o_valid, exec_pc_i_ready)
 529
 530         return m
 531
 532
 533 if __name__ == '__main__':
 534     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
 535              'spr': 1,
 536              'div': 1,
 537              'mul': 1,
 538              'shiftrot': 1
 539              }
 540     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
 541                          imem_ifacetype='bare_wb',
 542                          addr_wid=48,
 543                          mask_wid=8,
 544                          reg_wid=64,
 545                          units=units)
 546     dut = TestIssuer(pspec)
 547     vl = main(dut, ports=dut.ports(), name="test_issuer")
 548
 549     if len(sys.argv) == 1:
 550         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
 551         with open("test_issuer.il", "w") as f:
 552             f.write(vl)