src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs, MSR)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase(Elaboratable):
 160     """TestIssuerBase - common base class for Issuers
 161
 162     takes care of power-on reset, peripherals, debug, DEC/TB,
 163     and gets PC/MSR/SVSTATE from the State Regfile etc.
 164     """
 165
 166     def __init__(self, pspec):
 167
 168         # test is SVP64 is to be enabled
 169         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 170
 171         # and if regfiles are reduced
 172         self.regreduce_en = (hasattr(pspec, "regreduce") and
 173                              (pspec.regreduce == True))
 174
 175         # and if overlap requested
 176         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 177                               (pspec.allow_overlap == True))
 178
 179         # and get the core domain
 180         self.core_domain = "coresync"
 181         if (hasattr(pspec, "core_domain") and
 182             isinstance(pspec.core_domain, str)):
 183             self.core_domain = pspec.core_domain
 184
 185         # JTAG interface.  add this right at the start because if it's
 186         # added it *modifies* the pspec, by adding enable/disable signals
 187         # for parts of the rest of the core
 188         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 189         #self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 190         self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 191         if self.jtag_en:
 192             # XXX MUST keep this up-to-date with litex, and
 193             # soc-cocotb-sim, and err.. all needs sorting out, argh
 194             subset = ['uart',
 195                       'mtwi',
 196                       'eint', 'gpio', 'mspi0',
 197                       # 'mspi1', - disabled for now
 198                       # 'pwm', 'sd0', - disabled for now
 199                       'sdr']
 200             self.jtag = JTAG(get_pinspecs(subset=subset),
 201                              domain=self.dbg_domain)
 202             # add signals to pspec to enable/disable icache and dcache
 203             # (or data and intstruction wishbone if icache/dcache not included)
 204             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 205             # TODO: do we actually care if these are not domain-synchronised?
 206             # honestly probably not.
 207             pspec.wb_icache_en = self.jtag.wb_icache_en
 208             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 209             self.wb_sram_en = self.jtag.wb_sram_en
 210         else:
 211             self.wb_sram_en = Const(1)
 212
 213         # add 4k sram blocks?
 214         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 215                          pspec.sram4x4kblock == True)
 216         if self.sram4x4k:
 217             self.sram4k = []
 218             for i in range(4):
 219                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 220                                                     # features={'err'}
 221                                                     ))
 222
 223         # add interrupt controller?
 224         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 225         if self.xics:
 226             self.xics_icp = XICS_ICP()
 227             self.xics_ics = XICS_ICS()
 228             self.int_level_i = self.xics_ics.int_level_i
 229
 230         # add GPIO peripheral?
 231         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 232         if self.gpio:
 233             self.simple_gpio = SimpleGPIO()
 234             self.gpio_o = self.simple_gpio.gpio_o
 235
 236         # main instruction core.  suitable for prototyping / demo only
 237         self.core = core = NonProductionCore(pspec)
 238         self.core_rst = ResetSignal(self.core_domain)
 239
 240         # instruction decoder.  goes into Trap Record
 241         #pdecode = create_pdecode()
 242         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 243         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 244                                      opkls=IssuerDecode2ToOperand,
 245                                      svp64_en=self.svp64_en,
 246                                      regreduce_en=self.regreduce_en)
 247         pdecode = self.pdecode2.dec
 248
 249         if self.svp64_en:
 250             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 251
 252         self.update_svstate = Signal()  # set this if updating svstate
 253         self.new_svstate = new_svstate = SVSTATERec("new_svstate")
 254
 255         # Test Instruction memory
 256         if hasattr(core, "icache"):
 257             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 258             # truly dreadful.  needs a huge reorg.
 259             pspec.icache = core.icache
 260         self.imem = ConfigFetchUnit(pspec).fu
 261
 262         # DMI interface
 263         self.dbg = CoreDebug()
 264         self.dbg_rst_i = Signal(reset_less=True)
 265
 266         # instruction go/monitor
 267         self.pc_o = Signal(64, reset_less=True)
 268         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 269         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 270         self.svstate_i = Data(64, "svstate_i")  # ditto
 271         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 272         self.busy_o = Signal(reset_less=True)
 273         self.memerr_o = Signal(reset_less=True)
 274
 275         # STATE regfile read /write ports for PC, MSR, SVSTATE
 276         staterf = self.core.regs.rf['state']
 277         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 278         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 279         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 280
 281         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 282         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 283         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 284
 285         # DMI interface access
 286         intrf = self.core.regs.rf['int']
 287         crrf = self.core.regs.rf['cr']
 288         xerrf = self.core.regs.rf['xer']
 289         self.int_r = intrf.r_ports['dmi']  # INT read
 290         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 291         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 292
 293         if self.svp64_en:
 294             # for predication
 295             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 296             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 297
 298         # hack method of keeping an eye on whether branch/trap set the PC
 299         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 300         self.state_nia.wen.name = 'state_nia_wen'
 301
 302         # pulse to synchronize the simulator at instruction end
 303         self.insn_done = Signal()
 304
 305         # indicate any instruction still outstanding, in execution
 306         self.any_busy = Signal()
 307
 308         if self.svp64_en:
 309             # store copies of predicate masks
 310             self.srcmask = Signal(64)
 311             self.dstmask = Signal(64)
 312
 313     def setup_peripherals(self, m):
 314         comb, sync = m.d.comb, m.d.sync
 315
 316         # okaaaay so the debug module must be in coresync clock domain
 317         # but NOT its reset signal. to cope with this, set every single
 318         # submodule explicitly in coresync domain, debug and JTAG
 319         # in their own one but using *external* reset.
 320         csd = DomainRenamer(self.core_domain)
 321         dbd = DomainRenamer(self.dbg_domain)
 322
 323         m.submodules.core = core = csd(self.core)
 324         # this _so_ needs sorting out.  ICache is added down inside
 325         # LoadStore1 and is already a submodule of LoadStore1
 326         if not isinstance(self.imem, ICache):
 327             m.submodules.imem = imem = csd(self.imem)
 328         m.submodules.dbg = dbg = dbd(self.dbg)
 329         if self.jtag_en:
 330             m.submodules.jtag = jtag = dbd(self.jtag)
 331             # TODO: UART2GDB mux, here, from external pin
 332             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 333             sync += dbg.dmi.connect_to(jtag.dmi)
 334
 335         cur_state = self.cur_state
 336
 337         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 338         if self.sram4x4k:
 339             for i, sram in enumerate(self.sram4k):
 340                 m.submodules["sram4k_%d" % i] = csd(sram)
 341                 comb += sram.enable.eq(self.wb_sram_en)
 342
 343         # XICS interrupt handler
 344         if self.xics:
 345             m.submodules.xics_icp = icp = csd(self.xics_icp)
 346             m.submodules.xics_ics = ics = csd(self.xics_ics)
 347             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 348             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 349
 350         # GPIO test peripheral
 351         if self.gpio:
 352             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 353
 354         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 355         # XXX causes litex ECP5 test to get wrong idea about input and output
 356         # (but works with verilator sim *sigh*)
 357         # if self.gpio and self.xics:
 358         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 359
 360         # instruction decoder
 361         pdecode = create_pdecode()
 362         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 363         if self.svp64_en:
 364             m.submodules.svp64 = svp64 = csd(self.svp64)
 365
 366         # convenience
 367         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 368         intrf = self.core.regs.rf['int']
 369
 370         # clock delay power-on reset
 371         cd_por = ClockDomain(reset_less=True)
 372         cd_sync = ClockDomain()
 373         m.domains += cd_por, cd_sync
 374         core_sync = ClockDomain(self.core_domain)
 375         if self.core_domain != "sync":
 376             m.domains += core_sync
 377         if self.dbg_domain != "sync":
 378             dbg_sync = ClockDomain(self.dbg_domain)
 379             m.domains += dbg_sync
 380
 381         ti_rst = Signal(reset_less=True)
 382         delay = Signal(range(4), reset=3)
 383         with m.If(delay != 0):
 384             m.d.por += delay.eq(delay - 1)
 385         comb += cd_por.clk.eq(ClockSignal())
 386
 387         # power-on reset delay
 388         core_rst = ResetSignal(self.core_domain)
 389         if self.core_domain != "sync":
 390             comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 391             comb += core_rst.eq(ti_rst)
 392         else:
 393             with m.If(delay != 0 | dbg.core_rst_o):
 394                 comb += core_rst.eq(1)
 395
 396         # connect external reset signal to DMI Reset
 397         if self.dbg_domain != "sync":
 398             dbg_rst = ResetSignal(self.dbg_domain)
 399             comb += dbg_rst.eq(self.dbg_rst_i)
 400
 401         # busy/halted signals from core
 402         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 403         comb += self.busy_o.eq(core_busy_o)
 404         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 405
 406         # temporary hack: says "go" immediately for both address gen and ST
 407         l0 = core.l0
 408         ldst = core.fus.fus['ldst0']
 409         st_go_edge = rising_edge(m, ldst.st.rel_o)
 410         # link addr-go direct to rel
 411         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 412         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 413
 414     def do_dmi(self, m, dbg):
 415         """deals with DMI debug requests
 416
 417         currently only provides read requests for the INT regfile, CR and XER
 418         it will later also deal with *writing* to these regfiles.
 419         """
 420         comb = m.d.comb
 421         sync = m.d.sync
 422         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 423         intrf = self.core.regs.rf['int']
 424
 425         with m.If(d_reg.req):  # request for regfile access being made
 426             # TODO: error-check this
 427             # XXX should this be combinatorial?  sync better?
 428             if intrf.unary:
 429                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 430             else:
 431                 comb += self.int_r.addr.eq(d_reg.addr)
 432                 comb += self.int_r.ren.eq(1)
 433         d_reg_delay = Signal()
 434         sync += d_reg_delay.eq(d_reg.req)
 435         with m.If(d_reg_delay):
 436             # data arrives one clock later
 437             comb += d_reg.data.eq(self.int_r.o_data)
 438             comb += d_reg.ack.eq(1)
 439
 440         # sigh same thing for CR debug
 441         with m.If(d_cr.req):  # request for regfile access being made
 442             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 443         d_cr_delay = Signal()
 444         sync += d_cr_delay.eq(d_cr.req)
 445         with m.If(d_cr_delay):
 446             # data arrives one clock later
 447             comb += d_cr.data.eq(self.cr_r.o_data)
 448             comb += d_cr.ack.eq(1)
 449
 450         # aaand XER...
 451         with m.If(d_xer.req):  # request for regfile access being made
 452             comb += self.xer_r.ren.eq(0b111111)  # enable all
 453         d_xer_delay = Signal()
 454         sync += d_xer_delay.eq(d_xer.req)
 455         with m.If(d_xer_delay):
 456             # data arrives one clock later
 457             comb += d_xer.data.eq(self.xer_r.o_data)
 458             comb += d_xer.ack.eq(1)
 459
 460     def tb_dec_fsm(self, m, spr_dec):
 461         """tb_dec_fsm
 462
 463         this is a FSM for updating either dec or tb.  it runs alternately
 464         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 465         value to DEC, however the regfile has "passthrough" on it so this
 466         *should* be ok.
 467
 468         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
 469         """
 470
 471         comb, sync = m.d.comb, m.d.sync
 472         fast_rf = self.core.regs.rf['fast']
 473         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
 474         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
 475
 476         with m.FSM() as fsm:
 477
 478             # initiates read of current DEC
 479             with m.State("DEC_READ"):
 480                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
 481                 comb += fast_r_dectb.ren.eq(1)
 482                 m.next = "DEC_WRITE"
 483
 484             # waits for DEC read to arrive (1 cycle), updates with new value
 485             with m.State("DEC_WRITE"):
 486                 new_dec = Signal(64)
 487                 # TODO: MSR.LPCR 32-bit decrement mode
 488                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
 489                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
 490                 comb += fast_w_dectb.wen.eq(1)
 491                 comb += fast_w_dectb.i_data.eq(new_dec)
 492                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
 493                 m.next = "TB_READ"
 494
 495             # initiates read of current TB
 496             with m.State("TB_READ"):
 497                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
 498                 comb += fast_r_dectb.ren.eq(1)
 499                 m.next = "TB_WRITE"
 500
 501             # waits for read TB to arrive, initiates write of current TB
 502             with m.State("TB_WRITE"):
 503                 new_tb = Signal(64)
 504                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
 505                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
 506                 comb += fast_w_dectb.wen.eq(1)
 507                 comb += fast_w_dectb.i_data.eq(new_tb)
 508                 m.next = "DEC_READ"
 509
 510         return m
 511
 512     def elaborate(self, platform):
 513         m = Module()
 514         # convenience
 515         comb, sync = m.d.comb, m.d.sync
 516         cur_state = self.cur_state
 517         pdecode2 = self.pdecode2
 518         dbg = self.dbg
 519
 520         # set up peripherals and core
 521         core_rst = self.core_rst
 522         self.setup_peripherals(m)
 523
 524         # reset current state if core reset requested
 525         with m.If(core_rst):
 526             m.d.sync += self.cur_state.eq(0)
 527
 528         # PC and instruction from I-Memory
 529         comb += self.pc_o.eq(cur_state.pc)
 530         self.pc_changed = Signal()  # note write to PC
 531         self.msr_changed = Signal()  # note write to MSR
 532         self.sv_changed = Signal()  # note write to SVSTATE
 533
 534         # read state either from incoming override or from regfile
 535         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
 536         state_get(m, state.msr, core_rst, self.msr_i,
 537                        "msr",                  # read MSR
 538                        self.state_r_msr, StateRegs.MSR)
 539         state_get(m, state.pc, core_rst, self.pc_i,
 540                        "pc",                  # read PC
 541                        self.state_r_pc, StateRegs.PC)
 542         state_get(m, state.svstate, core_rst, self.svstate_i,
 543                             "svstate",   # read SVSTATE
 544                             self.state_r_sv, StateRegs.SVSTATE)
 545
 546         # don't write pc every cycle
 547         comb += self.state_w_pc.wen.eq(0)
 548         comb += self.state_w_pc.i_data.eq(0)
 549
 550         # connect up debug state.  note "combinatorially same" below,
 551         # this is a bit naff, passing state over in the dbg class, but
 552         # because it is combinatorial it achieves the desired goal
 553         comb += dbg.state.eq(state)
 554
 555         # this bit doesn't have to be in the FSM: connect up to read
 556         # regfiles on demand from DMI
 557         self.do_dmi(m, dbg)
 558
 559         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
 560         # (which uses that in PowerDecoder2 to raise 0x900 exception)
 561         self.tb_dec_fsm(m, cur_state.dec)
 562
 563         # while stopped, allow updating the MSR, PC and SVSTATE.
 564         # these are mainly for debugging purposes (including DMI/JTAG)
 565         with m.If(dbg.core_stopped_i):
 566             with m.If(self.pc_i.ok):
 567                 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 568                 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 569                 sync += self.pc_changed.eq(1)
 570             with m.If(self.msr_i.ok):
 571                 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 572                 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 573                 sync += self.msr_changed.eq(1)
 574             with m.If(self.svstate_i.ok | self.update_svstate):
 575                 with m.If(self.svstate_i.ok): # over-ride from external source
 576                     comb += self.new_svstate.eq(self.svstate_i.data)
 577                 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 578                 comb += self.state_w_sv.i_data.eq(self.new_svstate)
 579                 sync += self.sv_changed.eq(1)
 580
 581         return m
 582
 583     def __iter__(self):
 584         yield from self.pc_i.ports()
 585         yield from self.msr_i.ports()
 586         yield self.pc_o
 587         yield self.memerr_o
 588         yield from self.core.ports()
 589         yield from self.imem.ports()
 590         yield self.core_bigendian_i
 591         yield self.busy_o
 592
 593     def ports(self):
 594         return list(self)
 595
 596     def external_ports(self):
 597         ports = self.pc_i.ports()
 598         ports = self.msr_i.ports()
 599         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 600                   ]
 601
 602         if self.jtag_en:
 603             ports += list(self.jtag.external_ports())
 604         else:
 605             # don't add DMI if JTAG is enabled
 606             ports += list(self.dbg.dmi.ports())
 607
 608         ports += list(self.imem.ibus.fields.values())
 609         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 610
 611         if self.sram4x4k:
 612             for sram in self.sram4k:
 613                 ports += list(sram.bus.fields.values())
 614
 615         if self.xics:
 616             ports += list(self.xics_icp.bus.fields.values())
 617             ports += list(self.xics_ics.bus.fields.values())
 618             ports.append(self.int_level_i)
 619
 620         if self.gpio:
 621             ports += list(self.simple_gpio.bus.fields.values())
 622             ports.append(self.gpio_o)
 623
 624         return ports
 625
 626     def ports(self):
 627         return list(self)
 628
 629
 630
 631 # Fetch Finite State Machine.
 632 # WARNING: there are currently DriverConflicts but it's actually working.
 633 # TODO, here: everything that is global in nature, information from the
 634 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 635 # not only that: TestIssuerInternal.imem can entirely move into here
 636 # because imem is only ever accessed inside the FetchFSM.
 637 class FetchFSM(ControlBase):
 638     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 639                  pdecode2, cur_state,
 640                  dbg, core, svstate, nia, is_svp64_mode):
 641         self.allow_overlap = allow_overlap
 642         self.svp64_en = svp64_en
 643         self.imem = imem
 644         self.core_rst = core_rst
 645         self.pdecode2 = pdecode2
 646         self.cur_state = cur_state
 647         self.dbg = dbg
 648         self.core = core
 649         self.svstate = svstate
 650         self.nia = nia
 651         self.is_svp64_mode = is_svp64_mode
 652
 653         # set up pipeline ControlBase and allocate i/o specs
 654         # (unusual: normally done by the Pipeline API)
 655         super().__init__(stage=self)
 656         self.p.i_data, self.n.o_data = self.new_specs(None)
 657         self.i, self.o = self.p.i_data, self.n.o_data
 658
 659     # next 3 functions are Stage API Compliance
 660     def setup(self, m, i):
 661         pass
 662
 663     def ispec(self):
 664         return FetchInput()
 665
 666     def ospec(self):
 667         return FetchOutput()
 668
 669     def elaborate(self, platform):
 670         """fetch FSM
 671
 672         this FSM performs fetch of raw instruction data, partial-decodes
 673         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 674         read a 2nd 32-bit quantity if that occurs.
 675         """
 676         m = super().elaborate(platform)
 677
 678         dbg = self.dbg
 679         core = self.core
 680         pc = self.i.pc
 681         msr = self.i.msr
 682         svstate = self.svstate
 683         nia = self.nia
 684         is_svp64_mode = self.is_svp64_mode
 685         fetch_pc_o_ready = self.p.o_ready
 686         fetch_pc_i_valid = self.p.i_valid
 687         fetch_insn_o_valid = self.n.o_valid
 688         fetch_insn_i_ready = self.n.i_ready
 689
 690         comb = m.d.comb
 691         sync = m.d.sync
 692         pdecode2 = self.pdecode2
 693         cur_state = self.cur_state
 694         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 695
 696         # also note instruction fetch failed
 697         if hasattr(core, "icache"):
 698             fetch_failed = core.icache.i_out.fetch_failed
 699             flush_needed = True
 700         else:
 701             fetch_failed = Const(0, 1)
 702             flush_needed = False
 703
 704         # set priv / virt mode on I-Cache, sigh
 705         if isinstance(self.imem, ICache):
 706             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 707             comb += self.imem.i_in.virt_mode.eq(msr[MSR.DR])
 708
 709         with m.FSM(name='fetch_fsm'):
 710
 711             # waiting (zzz)
 712             with m.State("IDLE"):
 713                 with m.If(~dbg.stopping_o & ~fetch_failed):
 714                     comb += fetch_pc_o_ready.eq(1)
 715                 with m.If(fetch_pc_i_valid & ~pdecode2.instr_fault):
 716                     # instruction allowed to go: start by reading the PC
 717                     # capture the PC and also drop it into Insn Memory
 718                     # we have joined a pair of combinatorial memory
 719                     # lookups together.  this is Generally Bad.
 720                     comb += self.imem.a_pc_i.eq(pc)
 721                     comb += self.imem.a_i_valid.eq(1)
 722                     comb += self.imem.f_i_valid.eq(1)
 723                     # transfer state to output
 724                     sync += cur_state.pc.eq(pc)
 725                     sync += cur_state.svstate.eq(svstate)  # and svstate
 726                     sync += cur_state.msr.eq(msr)  # and msr
 727
 728                     m.next = "INSN_READ"  # move to "wait for bus" phase
 729
 730             # dummy pause to find out why simulation is not keeping up
 731             with m.State("INSN_READ"):
 732                 if self.allow_overlap:
 733                     stopping = dbg.stopping_o
 734                 else:
 735                     stopping = Const(0)
 736                 with m.If(stopping):
 737                     # stopping: jump back to idle
 738                     m.next = "IDLE"
 739                 with m.Else():
 740                     with m.If(self.imem.f_busy_o &
 741                               ~pdecode2.instr_fault):  # zzz...
 742                         # busy but not fetch failed: stay in wait-read
 743                         comb += self.imem.a_i_valid.eq(1)
 744                         comb += self.imem.f_i_valid.eq(1)
 745                     with m.Else():
 746                         # not busy (or fetch failed!): instruction fetched
 747                         # when fetch failed, the instruction gets ignored
 748                         # by the decoder
 749                         if hasattr(core, "icache"):
 750                             # blech, icache returns actual instruction
 751                             insn = self.imem.f_instr_o
 752                         else:
 753                             # but these return raw memory
 754                             insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 755                         if self.svp64_en:
 756                             svp64 = self.svp64
 757                             # decode the SVP64 prefix, if any
 758                             comb += svp64.raw_opcode_in.eq(insn)
 759                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 760                             # pass the decoded prefix (if any) to PowerDecoder2
 761                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 762                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 763                             # remember whether this is a prefixed instruction,
 764                             # so the FSM can readily loop when VL==0
 765                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 766                             # calculate the address of the following instruction
 767                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 768                             sync += nia.eq(cur_state.pc + insn_size)
 769                             with m.If(~svp64.is_svp64_mode):
 770                                 # with no prefix, store the instruction
 771                                 # and hand it directly to the next FSM
 772                                 sync += dec_opcode_o.eq(insn)
 773                                 m.next = "INSN_READY"
 774                             with m.Else():
 775                                 # fetch the rest of the instruction from memory
 776                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 777                                 comb += self.imem.a_i_valid.eq(1)
 778                                 comb += self.imem.f_i_valid.eq(1)
 779                                 m.next = "INSN_READ2"
 780                         else:
 781                             # not SVP64 - 32-bit only
 782                             sync += nia.eq(cur_state.pc + 4)
 783                             sync += dec_opcode_o.eq(insn)
 784                             m.next = "INSN_READY"
 785
 786             with m.State("INSN_READ2"):
 787                 with m.If(self.imem.f_busy_o):  # zzz...
 788                     # busy: stay in wait-read
 789                     comb += self.imem.a_i_valid.eq(1)
 790                     comb += self.imem.f_i_valid.eq(1)
 791                 with m.Else():
 792                     # not busy: instruction fetched
 793                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 794                     sync += dec_opcode_o.eq(insn)
 795                     m.next = "INSN_READY"
 796                     # TODO: probably can start looking at pdecode2.rm_dec
 797                     # here or maybe even in INSN_READ state, if svp64_mode
 798                     # detected, in order to trigger - and wait for - the
 799                     # predicate reading.
 800                     if self.svp64_en:
 801                         pmode = pdecode2.rm_dec.predmode
 802                     """
 803                     if pmode != SVP64PredMode.ALWAYS.value:
 804                         fire predicate loading FSM and wait before
 805                         moving to INSN_READY
 806                     else:
 807                         sync += self.srcmask.eq(-1) # set to all 1s
 808                         sync += self.dstmask.eq(-1) # set to all 1s
 809                         m.next = "INSN_READY"
 810                     """
 811
 812             with m.State("INSN_READY"):
 813                 # hand over the instruction, to be decoded
 814                 comb += fetch_insn_o_valid.eq(1)
 815                 with m.If(fetch_insn_i_ready):
 816                     m.next = "IDLE"
 817
 818         # whatever was done above, over-ride it if core reset is held
 819         with m.If(self.core_rst):
 820             sync += nia.eq(0)
 821
 822         return m
 823
 824
 825 class TestIssuerInternal(TestIssuerBase):
 826     """TestIssuer - reads instructions from TestMemory and issues them
 827
 828     efficiency and speed is not the main goal here: functional correctness
 829     and code clarity is.  optimisations (which almost 100% interfere with
 830     easy understanding) come later.
 831     """
 832
 833     def fetch_predicate_fsm(self, m,
 834                             pred_insn_i_valid, pred_insn_o_ready,
 835                             pred_mask_o_valid, pred_mask_i_ready):
 836         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 837            src/dest predicate masks
 838
 839         https://bugs.libre-soc.org/show_bug.cgi?id=617
 840         the predicates can be read here, by using IntRegs r_ports['pred']
 841         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 842         be done through multiple reads, extracting one relevant at a time.
 843         later, a faster way would be to use the 32-bit-wide CR port but
 844         this is more complex decoding, here.  equivalent code used in
 845         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 846
 847         note: this ENTIRE FSM is not to be called when svp64 is disabled
 848         """
 849         comb = m.d.comb
 850         sync = m.d.sync
 851         pdecode2 = self.pdecode2
 852         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 853         predmode = rm_dec.predmode
 854         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 855         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 856         # get src/dst step, so we can skip already used mask bits
 857         cur_state = self.cur_state
 858         srcstep = cur_state.svstate.srcstep
 859         dststep = cur_state.svstate.dststep
 860         cur_vl = cur_state.svstate.vl
 861
 862         # decode predicates
 863         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 864         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 865         sidx, scrinvert = get_predcr(m, srcpred, 's')
 866         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 867
 868         # store fetched masks, for either intpred or crpred
 869         # when src/dst step is not zero, the skipped mask bits need to be
 870         # shifted-out, before actually storing them in src/dest mask
 871         new_srcmask = Signal(64, reset_less=True)
 872         new_dstmask = Signal(64, reset_less=True)
 873
 874         with m.FSM(name="fetch_predicate"):
 875
 876             with m.State("FETCH_PRED_IDLE"):
 877                 comb += pred_insn_o_ready.eq(1)
 878                 with m.If(pred_insn_i_valid):
 879                     with m.If(predmode == SVP64PredMode.INT):
 880                         # skip fetching destination mask register, when zero
 881                         with m.If(dall1s):
 882                             sync += new_dstmask.eq(-1)
 883                             # directly go to fetch source mask register
 884                             # guaranteed not to be zero (otherwise predmode
 885                             # would be SVP64PredMode.ALWAYS, not INT)
 886                             comb += int_pred.addr.eq(sregread)
 887                             comb += int_pred.ren.eq(1)
 888                             m.next = "INT_SRC_READ"
 889                         # fetch destination predicate register
 890                         with m.Else():
 891                             comb += int_pred.addr.eq(dregread)
 892                             comb += int_pred.ren.eq(1)
 893                             m.next = "INT_DST_READ"
 894                     with m.Elif(predmode == SVP64PredMode.CR):
 895                         # go fetch masks from the CR register file
 896                         sync += new_srcmask.eq(0)
 897                         sync += new_dstmask.eq(0)
 898                         m.next = "CR_READ"
 899                     with m.Else():
 900                         sync += self.srcmask.eq(-1)
 901                         sync += self.dstmask.eq(-1)
 902                         m.next = "FETCH_PRED_DONE"
 903
 904             with m.State("INT_DST_READ"):
 905                 # store destination mask
 906                 inv = Repl(dinvert, 64)
 907                 with m.If(dunary):
 908                     # set selected mask bit for 1<<r3 mode
 909                     dst_shift = Signal(range(64))
 910                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 911                     sync += new_dstmask.eq(1 << dst_shift)
 912                 with m.Else():
 913                     # invert mask if requested
 914                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 915                 # skip fetching source mask register, when zero
 916                 with m.If(sall1s):
 917                     sync += new_srcmask.eq(-1)
 918                     m.next = "FETCH_PRED_SHIFT_MASK"
 919                 # fetch source predicate register
 920                 with m.Else():
 921                     comb += int_pred.addr.eq(sregread)
 922                     comb += int_pred.ren.eq(1)
 923                     m.next = "INT_SRC_READ"
 924
 925             with m.State("INT_SRC_READ"):
 926                 # store source mask
 927                 inv = Repl(sinvert, 64)
 928                 with m.If(sunary):
 929                     # set selected mask bit for 1<<r3 mode
 930                     src_shift = Signal(range(64))
 931                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 932                     sync += new_srcmask.eq(1 << src_shift)
 933                 with m.Else():
 934                     # invert mask if requested
 935                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 936                 m.next = "FETCH_PRED_SHIFT_MASK"
 937
 938             # fetch masks from the CR register file
 939             # implements the following loop:
 940             # idx, inv = get_predcr(mask)
 941             # mask = 0
 942             # for cr_idx in range(vl):
 943             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 944             #     if cr[idx] ^ inv:
 945             #         mask |= 1 << cr_idx
 946             # return mask
 947             with m.State("CR_READ"):
 948                 # CR index to be read, which will be ready by the next cycle
 949                 cr_idx = Signal.like(cur_vl, reset_less=True)
 950                 # submit the read operation to the regfile
 951                 with m.If(cr_idx != cur_vl):
 952                     # the CR read port is unary ...
 953                     # ren = 1 << cr_idx
 954                     # ... in MSB0 convention ...
 955                     # ren = 1 << (7 - cr_idx)
 956                     # ... and with an offset:
 957                     # ren = 1 << (7 - off - cr_idx)
 958                     idx = SVP64CROffs.CRPred + cr_idx
 959                     comb += cr_pred.ren.eq(1 << (7 - idx))
 960                     # signal data valid in the next cycle
 961                     cr_read = Signal(reset_less=True)
 962                     sync += cr_read.eq(1)
 963                     # load the next index
 964                     sync += cr_idx.eq(cr_idx + 1)
 965                 with m.Else():
 966                     # exit on loop end
 967                     sync += cr_read.eq(0)
 968                     sync += cr_idx.eq(0)
 969                     m.next = "FETCH_PRED_SHIFT_MASK"
 970                 with m.If(cr_read):
 971                     # compensate for the one cycle delay on the regfile
 972                     cur_cr_idx = Signal.like(cur_vl)
 973                     comb += cur_cr_idx.eq(cr_idx - 1)
 974                     # read the CR field, select the appropriate bit
 975                     cr_field = Signal(4)
 976                     scr_bit = Signal()
 977                     dcr_bit = Signal()
 978                     comb += cr_field.eq(cr_pred.o_data)
 979                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 980                                        ^ scrinvert)
 981                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 982                                        ^ dcrinvert)
 983                     # set the corresponding mask bit
 984                     bit_to_set = Signal.like(self.srcmask)
 985                     comb += bit_to_set.eq(1 << cur_cr_idx)
 986                     with m.If(scr_bit):
 987                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 988                     with m.If(dcr_bit):
 989                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 990
 991             with m.State("FETCH_PRED_SHIFT_MASK"):
 992                 # shift-out skipped mask bits
 993                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 994                 sync += self.dstmask.eq(new_dstmask >> dststep)
 995                 m.next = "FETCH_PRED_DONE"
 996
 997             with m.State("FETCH_PRED_DONE"):
 998                 comb += pred_mask_o_valid.eq(1)
 999                 with m.If(pred_mask_i_ready):
1000                     m.next = "FETCH_PRED_IDLE"
1001
1002     def issue_fsm(self, m, core, nia,
1003                   dbg, core_rst, is_svp64_mode,
1004                   fetch_pc_o_ready, fetch_pc_i_valid,
1005                   fetch_insn_o_valid, fetch_insn_i_ready,
1006                   pred_insn_i_valid, pred_insn_o_ready,
1007                   pred_mask_o_valid, pred_mask_i_ready,
1008                   exec_insn_i_valid, exec_insn_o_ready,
1009                   exec_pc_o_valid, exec_pc_i_ready):
1010         """issue FSM
1011
1012         decode / issue FSM.  this interacts with the "fetch" FSM
1013         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
1014         (outgoing). also interacts with the "execute" FSM
1015         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
1016         (incoming).
1017         SVP64 RM prefixes have already been set up by the
1018         "fetch" phase, so execute is fairly straightforward.
1019         """
1020
1021         comb = m.d.comb
1022         sync = m.d.sync
1023         pdecode2 = self.pdecode2
1024         cur_state = self.cur_state
1025         new_svstate = self.new_svstate
1026
1027         # temporaries
1028         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
1029
1030         # for updating svstate (things like srcstep etc.)
1031         comb += new_svstate.eq(cur_state.svstate)
1032
1033         # precalculate srcstep+1 and dststep+1
1034         cur_srcstep = cur_state.svstate.srcstep
1035         cur_dststep = cur_state.svstate.dststep
1036         next_srcstep = Signal.like(cur_srcstep)
1037         next_dststep = Signal.like(cur_dststep)
1038         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
1039         comb += next_dststep.eq(cur_state.svstate.dststep+1)
1040
1041         # note if an exception happened.  in a pipelined or OoO design
1042         # this needs to be accompanied by "shadowing" (or stalling)
1043         exc_happened = self.core.o.exc_happened
1044         # also note instruction fetch failed
1045         if hasattr(core, "icache"):
1046             fetch_failed = core.icache.i_out.fetch_failed
1047             flush_needed = True
1048             # set to fault in decoder
1049             # update (highest priority) instruction fault
1050             rising_fetch_failed = rising_edge(m, fetch_failed)
1051             with m.If(rising_fetch_failed):
1052                 sync += pdecode2.instr_fault.eq(1)
1053         else:
1054             fetch_failed = Const(0, 1)
1055             flush_needed = False
1056
1057         with m.FSM(name="issue_fsm"):
1058
1059             # sync with the "fetch" phase which is reading the instruction
1060             # at this point, there is no instruction running, that
1061             # could inadvertently update the PC.
1062             with m.State("ISSUE_START"):
1063                 # reset instruction fault
1064                 sync += pdecode2.instr_fault.eq(0)
1065                 # wait on "core stop" release, before next fetch
1066                 # need to do this here, in case we are in a VL==0 loop
1067                 with m.If(~dbg.core_stop_o & ~core_rst):
1068                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
1069                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
1070                         m.next = "INSN_WAIT"
1071                 with m.Else():
1072                     # tell core it's stopped, and acknowledge debug handshake
1073                     comb += dbg.core_stopped_i.eq(1)
1074                     # while stopped, allow updating SVSTATE
1075                     with m.If(self.svstate_i.ok):
1076                         comb += new_svstate.eq(self.svstate_i.data)
1077                         comb += self.update_svstate.eq(1)
1078                         sync += self.sv_changed.eq(1)
1079
1080             # wait for an instruction to arrive from Fetch
1081             with m.State("INSN_WAIT"):
1082                 if self.allow_overlap:
1083                     stopping = dbg.stopping_o
1084                 else:
1085                     stopping = Const(0)
1086                 with m.If(stopping):
1087                     # stopping: jump back to idle
1088                     m.next = "ISSUE_START"
1089                     if flush_needed:
1090                         # request the icache to stop asserting "failed"
1091                         comb += core.icache.flush_in.eq(1)
1092                     # stop instruction fault
1093                     sync += pdecode2.instr_fault.eq(0)
1094                 with m.Else():
1095                     comb += fetch_insn_i_ready.eq(1)
1096                     with m.If(fetch_insn_o_valid):
1097                         # loop into ISSUE_START if it's a SVP64 instruction
1098                         # and VL == 0.  this because VL==0 is a for-loop
1099                         # from 0 to 0 i.e. always, always a NOP.
1100                         cur_vl = cur_state.svstate.vl
1101                         with m.If(is_svp64_mode & (cur_vl == 0)):
1102                             # update the PC before fetching the next instruction
1103                             # since we are in a VL==0 loop, no instruction was
1104                             # executed that we could be overwriting
1105                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1106                             comb += self.state_w_pc.i_data.eq(nia)
1107                             comb += self.insn_done.eq(1)
1108                             m.next = "ISSUE_START"
1109                         with m.Else():
1110                             if self.svp64_en:
1111                                 m.next = "PRED_START"  # fetching predicate
1112                             else:
1113                                 m.next = "DECODE_SV"  # skip predication
1114
1115             with m.State("PRED_START"):
1116                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1117                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1118                     m.next = "MASK_WAIT"
1119
1120             with m.State("MASK_WAIT"):
1121                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1122                 with m.If(pred_mask_o_valid):  # predication masks are ready
1123                     m.next = "PRED_SKIP"
1124
1125             # skip zeros in predicate
1126             with m.State("PRED_SKIP"):
1127                 with m.If(~is_svp64_mode):
1128                     m.next = "DECODE_SV"  # nothing to do
1129                 with m.Else():
1130                     if self.svp64_en:
1131                         pred_src_zero = pdecode2.rm_dec.pred_sz
1132                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1133
1134                         # new srcstep, after skipping zeros
1135                         skip_srcstep = Signal.like(cur_srcstep)
1136                         # value to be added to the current srcstep
1137                         src_delta = Signal.like(cur_srcstep)
1138                         # add leading zeros to srcstep, if not in zero mode
1139                         with m.If(~pred_src_zero):
1140                             # priority encoder (count leading zeros)
1141                             # append guard bit, in case the mask is all zeros
1142                             pri_enc_src = PriorityEncoder(65)
1143                             m.submodules.pri_enc_src = pri_enc_src
1144                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1145                                                          Const(1, 1)))
1146                             comb += src_delta.eq(pri_enc_src.o)
1147                         # apply delta to srcstep
1148                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1149                         # shift-out all leading zeros from the mask
1150                         # plus the leading "one" bit
1151                         # TODO count leading zeros and shift-out the zero
1152                         #      bits, in the same step, in hardware
1153                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1154
1155                         # same as above, but for dststep
1156                         skip_dststep = Signal.like(cur_dststep)
1157                         dst_delta = Signal.like(cur_dststep)
1158                         with m.If(~pred_dst_zero):
1159                             pri_enc_dst = PriorityEncoder(65)
1160                             m.submodules.pri_enc_dst = pri_enc_dst
1161                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1162                                                          Const(1, 1)))
1163                             comb += dst_delta.eq(pri_enc_dst.o)
1164                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1165                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1166
1167                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1168                         with m.If((skip_srcstep >= cur_vl) |
1169                                   (skip_dststep >= cur_vl)):
1170                             # end of VL loop. Update PC and reset src/dst step
1171                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1172                             comb += self.state_w_pc.i_data.eq(nia)
1173                             comb += new_svstate.srcstep.eq(0)
1174                             comb += new_svstate.dststep.eq(0)
1175                             comb += self.update_svstate.eq(1)
1176                             # synchronize with the simulator
1177                             comb += self.insn_done.eq(1)
1178                             # go back to Issue
1179                             m.next = "ISSUE_START"
1180                         with m.Else():
1181                             # update new src/dst step
1182                             comb += new_svstate.srcstep.eq(skip_srcstep)
1183                             comb += new_svstate.dststep.eq(skip_dststep)
1184                             comb += self.update_svstate.eq(1)
1185                             # proceed to Decode
1186                             m.next = "DECODE_SV"
1187
1188                         # pass predicate mask bits through to satellite decoders
1189                         # TODO: for SIMD this will be *multiple* bits
1190                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1191                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1192
1193             # after src/dst step have been updated, we are ready
1194             # to decode the instruction
1195             with m.State("DECODE_SV"):
1196                 # decode the instruction
1197                 with m.If(~fetch_failed):
1198                     sync += pdecode2.instr_fault.eq(0)
1199                 sync += core.i.e.eq(pdecode2.e)
1200                 sync += core.i.state.eq(cur_state)
1201                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1202                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1203                 if self.svp64_en:
1204                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1205                     # set RA_OR_ZERO detection in satellite decoders
1206                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1207                     # and svp64 detection
1208                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1209                     # and svp64 bit-rev'd ldst mode
1210                     ldst_dec = pdecode2.use_svp64_ldst_dec
1211                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1212                 # after decoding, reset any previous exception condition,
1213                 # allowing it to be set again during the next execution
1214                 sync += pdecode2.ldst_exc.eq(0)
1215
1216                 m.next = "INSN_EXECUTE"  # move to "execute"
1217
1218             # handshake with execution FSM, move to "wait" once acknowledged
1219             with m.State("INSN_EXECUTE"):
1220                 comb += exec_insn_i_valid.eq(1)  # trigger execute
1221                 with m.If(exec_insn_o_ready):   # execute acknowledged us
1222                     m.next = "EXECUTE_WAIT"
1223
1224             with m.State("EXECUTE_WAIT"):
1225                 # wait on "core stop" release, at instruction end
1226                 # need to do this here, in case we are in a VL>1 loop
1227                 with m.If(~dbg.core_stop_o & ~core_rst):
1228                     comb += exec_pc_i_ready.eq(1)
1229                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1230                     # the exception info needs to be blatted into
1231                     # pdecode.ldst_exc, and the instruction "re-run".
1232                     # when ldst_exc.happened is set, the PowerDecoder2
1233                     # reacts very differently: it re-writes the instruction
1234                     # with a "trap" (calls PowerDecoder2.trap()) which
1235                     # will *overwrite* whatever was requested and jump the
1236                     # PC to the exception address, as well as alter MSR.
1237                     # nothing else needs to be done other than to note
1238                     # the change of PC and MSR (and, later, SVSTATE)
1239                     with m.If(exc_happened):
1240                         mmu = core.fus.get_exc("mmu0")
1241                         ldst = core.fus.get_exc("ldst0")
1242                         if mmu is not None:
1243                             with m.If(fetch_failed):
1244                                 # instruction fetch: exception is from MMU
1245                                 # reset instr_fault (highest priority)
1246                                 sync += pdecode2.ldst_exc.eq(mmu)
1247                                 sync += pdecode2.instr_fault.eq(0)
1248                                 if flush_needed:
1249                                     # request icache to stop asserting "failed"
1250                                     comb += core.icache.flush_in.eq(1)
1251                         with m.If(~fetch_failed):
1252                             # otherwise assume it was a LDST exception
1253                             sync += pdecode2.ldst_exc.eq(ldst)
1254
1255                     with m.If(exec_pc_o_valid):
1256
1257                         # was this the last loop iteration?
1258                         is_last = Signal()
1259                         cur_vl = cur_state.svstate.vl
1260                         comb += is_last.eq(next_srcstep == cur_vl)
1261
1262                         with m.If(pdecode2.instr_fault):
1263                             # reset instruction fault, try again
1264                             sync += pdecode2.instr_fault.eq(0)
1265                             m.next = "ISSUE_START"
1266
1267                         # return directly to Decode if Execute generated an
1268                         # exception.
1269                         with m.Elif(pdecode2.ldst_exc.happened):
1270                             m.next = "DECODE_SV"
1271
1272                         # if MSR, PC or SVSTATE were changed by the previous
1273                         # instruction, go directly back to Fetch, without
1274                         # updating either MSR PC or SVSTATE
1275                         with m.Elif(self.msr_changed | self.pc_changed |
1276                                     self.sv_changed):
1277                             m.next = "ISSUE_START"
1278
1279                         # also return to Fetch, when no output was a vector
1280                         # (regardless of SRCSTEP and VL), or when the last
1281                         # instruction was really the last one of the VL loop
1282                         with m.Elif((~pdecode2.loop_continue) | is_last):
1283                             # before going back to fetch, update the PC state
1284                             # register with the NIA.
1285                             # ok here we are not reading the branch unit.
1286                             # TODO: this just blithely overwrites whatever
1287                             #       pipeline updated the PC
1288                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1289                             comb += self.state_w_pc.i_data.eq(nia)
1290                             # reset SRCSTEP before returning to Fetch
1291                             if self.svp64_en:
1292                                 with m.If(pdecode2.loop_continue):
1293                                     comb += new_svstate.srcstep.eq(0)
1294                                     comb += new_svstate.dststep.eq(0)
1295                                     comb += self.update_svstate.eq(1)
1296                             else:
1297                                 comb += new_svstate.srcstep.eq(0)
1298                                 comb += new_svstate.dststep.eq(0)
1299                                 comb += self.update_svstate.eq(1)
1300                             m.next = "ISSUE_START"
1301
1302                         # returning to Execute? then, first update SRCSTEP
1303                         with m.Else():
1304                             comb += new_svstate.srcstep.eq(next_srcstep)
1305                             comb += new_svstate.dststep.eq(next_dststep)
1306                             comb += self.update_svstate.eq(1)
1307                             # return to mask skip loop
1308                             m.next = "PRED_SKIP"
1309
1310                 with m.Else():
1311                     comb += dbg.core_stopped_i.eq(1)
1312                     if flush_needed:
1313                         # request the icache to stop asserting "failed"
1314                         comb += core.icache.flush_in.eq(1)
1315                     # stop instruction fault
1316                     sync += pdecode2.instr_fault.eq(0)
1317
1318         # check if svstate needs updating: if so, write it to State Regfile
1319         with m.If(self.update_svstate):
1320             sync += cur_state.svstate.eq(self.new_svstate)  # for next clock
1321
1322     def execute_fsm(self, m, core,
1323                     exec_insn_i_valid, exec_insn_o_ready,
1324                     exec_pc_o_valid, exec_pc_i_ready):
1325         """execute FSM
1326
1327         execute FSM. this interacts with the "issue" FSM
1328         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1329         (outgoing). SVP64 RM prefixes have already been set up by the
1330         "issue" phase, so execute is fairly straightforward.
1331         """
1332
1333         comb = m.d.comb
1334         sync = m.d.sync
1335         pdecode2 = self.pdecode2
1336
1337         # temporaries
1338         core_busy_o = core.n.o_data.busy_o  # core is busy
1339         core_ivalid_i = core.p.i_valid              # instruction is valid
1340
1341         if hasattr(core, "icache"):
1342             fetch_failed = core.icache.i_out.fetch_failed
1343         else:
1344             fetch_failed = Const(0, 1)
1345
1346         with m.FSM(name="exec_fsm"):
1347
1348             # waiting for instruction bus (stays there until not busy)
1349             with m.State("INSN_START"):
1350                 comb += exec_insn_o_ready.eq(1)
1351                 with m.If(exec_insn_i_valid):
1352                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1353                     sync += self.sv_changed.eq(0)
1354                     sync += self.pc_changed.eq(0)
1355                     sync += self.msr_changed.eq(0)
1356                     with m.If(core.p.o_ready):  # only move if accepted
1357                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1358
1359             # instruction started: must wait till it finishes
1360             with m.State("INSN_ACTIVE"):
1361                 # note changes to MSR, PC and SVSTATE
1362                 # XXX oops, really must monitor *all* State Regfile write
1363                 # ports looking for changes!
1364                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1365                     sync += self.sv_changed.eq(1)
1366                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1367                     sync += self.msr_changed.eq(1)
1368                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1369                     sync += self.pc_changed.eq(1)
1370                 with m.If(~core_busy_o):  # instruction done!
1371                     comb += exec_pc_o_valid.eq(1)
1372                     with m.If(exec_pc_i_ready):
1373                         # when finished, indicate "done".
1374                         # however, if there was an exception, the instruction
1375                         # is *not* yet done.  this is an implementation
1376                         # detail: we choose to implement exceptions by
1377                         # taking the exception information from the LDST
1378                         # unit, putting that *back* into the PowerDecoder2,
1379                         # and *re-running the entire instruction*.
1380                         # if we erroneously indicate "done" here, it is as if
1381                         # there were *TWO* instructions:
1382                         # 1) the failed LDST 2) a TRAP.
1383                         with m.If(~pdecode2.ldst_exc.happened &
1384                                    ~pdecode2.instr_fault):
1385                             comb += self.insn_done.eq(1)
1386                         m.next = "INSN_START"  # back to fetch
1387
1388     def elaborate(self, platform):
1389         m = super().elaborate(platform)
1390         # convenience
1391         comb, sync = m.d.comb, m.d.sync
1392         cur_state = self.cur_state
1393         pdecode2 = self.pdecode2
1394         dbg = self.dbg
1395         core = self.core
1396
1397         # set up peripherals and core
1398         core_rst = self.core_rst
1399
1400         # indicate to outside world if any FU is still executing
1401         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1402
1403         # address of the next instruction, in the absence of a branch
1404         # depends on the instruction size
1405         nia = Signal(64)
1406
1407         # connect up debug signals
1408         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1409
1410         # pass the prefix mode from Fetch to Issue, so the latter can loop
1411         # on VL==0
1412         is_svp64_mode = Signal()
1413
1414         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1415         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1416         # these are the handshake signals between each
1417
1418         # fetch FSM can run as soon as the PC is valid
1419         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1420         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1421
1422         # fetch FSM hands over the instruction to be decoded / issued
1423         fetch_insn_o_valid = Signal()
1424         fetch_insn_i_ready = Signal()
1425
1426         # predicate fetch FSM decodes and fetches the predicate
1427         pred_insn_i_valid = Signal()
1428         pred_insn_o_ready = Signal()
1429
1430         # predicate fetch FSM delivers the masks
1431         pred_mask_o_valid = Signal()
1432         pred_mask_i_ready = Signal()
1433
1434         # issue FSM delivers the instruction to the be executed
1435         exec_insn_i_valid = Signal()
1436         exec_insn_o_ready = Signal()
1437
1438         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1439         exec_pc_o_valid = Signal()
1440         exec_pc_i_ready = Signal()
1441
1442         # the FSMs here are perhaps unusual in that they detect conditions
1443         # then "hold" information, combinatorially, for the core
1444         # (as opposed to using sync - which would be on a clock's delay)
1445         # this includes the actual opcode, valid flags and so on.
1446
1447         # Fetch, then predicate fetch, then Issue, then Execute.
1448         # Issue is where the VL for-loop # lives.  the ready/valid
1449         # signalling is used to communicate between the four.
1450
1451         # set up Fetch FSM
1452         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1453                          self.imem, core_rst, pdecode2, cur_state,
1454                          dbg, core,
1455                          dbg.state.svstate, # combinatorially same
1456                          nia, is_svp64_mode)
1457         m.submodules.fetch = fetch
1458         # connect up in/out data to existing Signals
1459         comb += fetch.p.i_data.pc.eq(dbg.state.pc)   # combinatorially same
1460         comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
1461         # and the ready/valid signalling
1462         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1463         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1464         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1465         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1466
1467         self.issue_fsm(m, core, nia,
1468                        dbg, core_rst, is_svp64_mode,
1469                        fetch_pc_o_ready, fetch_pc_i_valid,
1470                        fetch_insn_o_valid, fetch_insn_i_ready,
1471                        pred_insn_i_valid, pred_insn_o_ready,
1472                        pred_mask_o_valid, pred_mask_i_ready,
1473                        exec_insn_i_valid, exec_insn_o_ready,
1474                        exec_pc_o_valid, exec_pc_i_ready)
1475
1476         if self.svp64_en:
1477             self.fetch_predicate_fsm(m,
1478                                      pred_insn_i_valid, pred_insn_o_ready,
1479                                      pred_mask_o_valid, pred_mask_i_ready)
1480
1481         self.execute_fsm(m, core,
1482                          exec_insn_i_valid, exec_insn_o_ready,
1483                          exec_pc_o_valid, exec_pc_i_ready)
1484
1485         return m
1486
1487
1488 class TestIssuer(Elaboratable):
1489     def __init__(self, pspec):
1490         self.ti = TestIssuerInternal(pspec)
1491         # XXX TODO: make this a command-line selectable option from pspec
1492         #from soc.simple.inorder import TestIssuerInternalInOrder
1493         #self.ti = TestIssuerInternalInOrder(pspec)
1494         self.pll = DummyPLL(instance=True)
1495
1496         self.dbg_rst_i = Signal(reset_less=True)
1497
1498         # PLL direct clock or not
1499         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1500         if self.pll_en:
1501             self.pll_test_o = Signal(reset_less=True)
1502             self.pll_vco_o = Signal(reset_less=True)
1503             self.clk_sel_i = Signal(2, reset_less=True)
1504             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1505             self.pllclk_clk = ClockSignal("pllclk")
1506
1507     def elaborate(self, platform):
1508         m = Module()
1509         comb = m.d.comb
1510
1511         # TestIssuer nominally runs at main clock, actually it is
1512         # all combinatorial internally except for coresync'd components
1513         m.submodules.ti = ti = self.ti
1514
1515         if self.pll_en:
1516             # ClockSelect runs at PLL output internal clock rate
1517             m.submodules.wrappll = pll = self.pll
1518
1519             # add clock domains from PLL
1520             cd_pll = ClockDomain("pllclk")
1521             m.domains += cd_pll
1522
1523             # PLL clock established.  has the side-effect of running clklsel
1524             # at the PLL's speed (see DomainRenamer("pllclk") above)
1525             pllclk = self.pllclk_clk
1526             comb += pllclk.eq(pll.clk_pll_o)
1527
1528             # wire up external 24mhz to PLL
1529             #comb += pll.clk_24_i.eq(self.ref_clk)
1530             # output 18 mhz PLL test signal, and analog oscillator out
1531             comb += self.pll_test_o.eq(pll.pll_test_o)
1532             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1533
1534             # input to pll clock selection
1535             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1536
1537             # now wire up ResetSignals.  don't mind them being in this domain
1538             pll_rst = ResetSignal("pllclk")
1539             comb += pll_rst.eq(ResetSignal())
1540
1541         # internal clock is set to selector clock-out.  has the side-effect of
1542         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1543         # debug clock runs at coresync internal clock
1544         if self.ti.dbg_domain != 'sync':
1545             cd_dbgsync = ClockDomain("dbgsync")
1546         intclk = ClockSignal(self.ti.core_domain)
1547         dbgclk = ClockSignal(self.ti.dbg_domain)
1548         # XXX BYPASS PLL XXX
1549         # XXX BYPASS PLL XXX
1550         # XXX BYPASS PLL XXX
1551         if self.pll_en:
1552             comb += intclk.eq(self.ref_clk)
1553             assert self.ti.core_domain != 'sync', \
1554                 "cannot set core_domain to sync and use pll at the same time"
1555         else:
1556             if self.ti.core_domain != 'sync':
1557                 comb += intclk.eq(ClockSignal())
1558         if self.ti.dbg_domain != 'sync':
1559             dbgclk = ClockSignal(self.ti.dbg_domain)
1560             comb += dbgclk.eq(intclk)
1561         comb += self.ti.dbg_rst_i.eq(self.dbg_rst_i)
1562
1563         return m
1564
1565     def ports(self):
1566         return list(self.ti.ports()) + list(self.pll.ports()) + \
1567             [ClockSignal(), ResetSignal()]
1568
1569     def external_ports(self):
1570         ports = self.ti.external_ports()
1571         ports.append(ClockSignal())
1572         ports.append(ResetSignal())
1573         if self.pll_en:
1574             ports.append(self.clk_sel_i)
1575             ports.append(self.pll.clk_24_i)
1576             ports.append(self.pll_test_o)
1577             ports.append(self.pll_vco_o)
1578             ports.append(self.pllclk_clk)
1579             ports.append(self.ref_clk)
1580         return ports
1581
1582
1583 if __name__ == '__main__':
1584     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1585              'spr': 1,
1586              'div': 1,
1587              'mul': 1,
1588              'shiftrot': 1
1589              }
1590     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1591                          imem_ifacetype='bare_wb',
1592                          addr_wid=48,
1593                          mask_wid=8,
1594                          reg_wid=64,
1595                          units=units)
1596     dut = TestIssuer(pspec)
1597     vl = main(dut, ports=dut.ports(), name="test_issuer")
1598
1599     if len(sys.argv) == 1:
1600         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1601         with open("test_issuer.il", "w") as f:
1602             f.write(vl)