src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs, MSR)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase(Elaboratable):
 160     """TestIssuerBase - common base class for Issuers
 161
 162     takes care of power-on reset, peripherals, debug, DEC/TB,
 163     and gets PC/MSR/SVSTATE from the State Regfile etc.
 164     """
 165
 166     def __init__(self, pspec):
 167
 168         # test if microwatt compatibility is to be enabled
 169         self.microwatt_compat = (hasattr(pspec, "microwatt_compat") and
 170                                  (pspec.microwatt_compat == True))
 171         self.alt_reset = Signal(reset_less=True) # not connected yet (microwatt)
 172         # test if fabric compatibility is to be enabled
 173         self.fabric_compat = (hasattr(pspec, "fabric_compat") and
 174                                  (pspec.fabric_compat == True))
 175
 176         if self.microwatt_compat or self.fabric_compat:
 177
 178             if hasattr(pspec, "microwatt_old"):
 179                 self.microwatt_old = pspec.microwatt_old
 180             else:
 181                 self.microwatt_old = True # PLEASE DO NOT ALTER THIS
 182
 183             if hasattr(pspec, "microwatt_debug"):
 184                 self.microwatt_debug = pspec.microwatt_debug
 185             else:
 186                 self.microwatt_debug = True # set to False when using an FPGA
 187
 188         # test is SVP64 is to be enabled
 189         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 190
 191         # and if regfiles are reduced
 192         self.regreduce_en = (hasattr(pspec, "regreduce") and
 193                              (pspec.regreduce == True))
 194
 195         # and if overlap requested
 196         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 197                               (pspec.allow_overlap == True))
 198
 199         # and get the core domain
 200         self.core_domain = "coresync"
 201         if (hasattr(pspec, "core_domain") and
 202             isinstance(pspec.core_domain, str)):
 203             self.core_domain = pspec.core_domain
 204
 205         # JTAG interface.  add this right at the start because if it's
 206         # added it *modifies* the pspec, by adding enable/disable signals
 207         # for parts of the rest of the core
 208         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 209         #self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 210         self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 211         if self.jtag_en:
 212             # XXX MUST keep this up-to-date with fabric, and
 213             # soc-cocotb-sim, and err.. all needs sorting out, argh
 214             subset = ['uart',
 215                       'mtwi',
 216                       'eint', 'gpio', 'mspi0',
 217                       # 'mspi1', - disabled for now
 218                       # 'pwm', 'sd0', - disabled for now
 219                       'sdr']
 220             self.jtag = JTAG(get_pinspecs(subset=subset),
 221                              domain=self.dbg_domain)
 222             # add signals to pspec to enable/disable icache and dcache
 223             # (or data and intstruction wishbone if icache/dcache not included)
 224             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 225             # TODO: do we actually care if these are not domain-synchronised?
 226             # honestly probably not.
 227             pspec.wb_icache_en = self.jtag.wb_icache_en
 228             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 229             self.wb_sram_en = self.jtag.wb_sram_en
 230         else:
 231             self.wb_sram_en = Const(1)
 232
 233         # add 4k sram blocks?
 234         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 235                          pspec.sram4x4kblock == True)
 236         if self.sram4x4k:
 237             self.sram4k = []
 238             for i in range(4):
 239                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 240                                                     # features={'err'}
 241                                                     ))
 242
 243         # add interrupt controller?
 244         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 245         if self.xics:
 246             self.xics_icp = XICS_ICP()
 247             self.xics_ics = XICS_ICS()
 248             self.int_level_i = self.xics_ics.int_level_i
 249         else:
 250             self.ext_irq = Signal()
 251
 252         # add GPIO peripheral?
 253         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 254         if self.gpio:
 255             self.simple_gpio = SimpleGPIO()
 256             self.gpio_o = self.simple_gpio.gpio_o
 257
 258         # main instruction core.  suitable for prototyping / demo only
 259         self.core = core = NonProductionCore(pspec)
 260         self.core_rst = ResetSignal(self.core_domain)
 261
 262         # instruction decoder.  goes into Trap Record
 263         #pdecode = create_pdecode()
 264         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 265         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 266                                      opkls=IssuerDecode2ToOperand,
 267                                      svp64_en=self.svp64_en,
 268                                      regreduce_en=self.regreduce_en)
 269         pdecode = self.pdecode2.dec
 270
 271         if self.svp64_en:
 272             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 273
 274         self.update_svstate = Signal()  # set this if updating svstate
 275         self.new_svstate = new_svstate = SVSTATERec("new_svstate")
 276
 277         # Test Instruction memory
 278         if hasattr(core, "icache"):
 279             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 280             # truly dreadful.  needs a huge reorg.
 281             pspec.icache = core.icache
 282         self.imem = ConfigFetchUnit(pspec).fu
 283
 284         # DMI interface
 285         self.dbg = CoreDebug()
 286         self.dbg_rst_i = Signal(reset_less=True)
 287
 288         # instruction go/monitor
 289         self.pc_o = Signal(64, reset_less=True)
 290         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 291         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 292         self.svstate_i = Data(64, "svstate_i")  # ditto
 293         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 294         self.busy_o = Signal(reset_less=True)
 295         self.memerr_o = Signal(reset_less=True)
 296
 297         # STATE regfile read /write ports for PC, MSR, SVSTATE
 298         staterf = self.core.regs.rf['state']
 299         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 300         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 301         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 302
 303         self.state_w_msr = staterf.w_ports['d_wr2']  # MSR wr
 304         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 305         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 306
 307         # DMI interface access
 308         intrf = self.core.regs.rf['int']
 309         fastrf = self.core.regs.rf['fast']
 310         crrf = self.core.regs.rf['cr']
 311         xerrf = self.core.regs.rf['xer']
 312         self.int_r = intrf.r_ports['dmi']  # INT DMI read
 313         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR DMI read
 314         self.xer_r = xerrf.r_ports['full_xer']  # XER DMI read
 315         self.fast_r = fastrf.r_ports['dmi']  # FAST DMI read
 316
 317         if self.svp64_en:
 318             # for predication
 319             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 320             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 321
 322         # hack method of keeping an eye on whether branch/trap set the PC
 323         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 324         self.state_nia.wen.name = 'state_nia_wen'
 325         # and whether SPR pipeline sets DEC or TB (fu/spr/main_stage.py)
 326         self.state_spr = self.core.regs.rf['state'].w_ports['state1']
 327
 328         # pulse to synchronize the simulator at instruction end
 329         self.insn_done = Signal()
 330
 331         # indicate any instruction still outstanding, in execution
 332         self.any_busy = Signal()
 333
 334         if self.svp64_en:
 335             # store copies of predicate masks
 336             self.srcmask = Signal(64)
 337             self.dstmask = Signal(64)
 338
 339         # sigh, the wishbone addresses are not wishbone-compliant
 340         # in old versions of microwatt, tplaten_3d_game is a new one
 341         if self.microwatt_compat or self.fabric_compat:
 342             self.ibus_adr = Signal(32, name='wishbone_insn_out.adr')
 343             self.dbus_adr = Signal(32, name='wishbone_data_out.adr')
 344
 345         # add an output of the PC and instruction, and whether it was requested
 346         # this is for verilator debug purposes
 347         if self.microwatt_compat or self.fabric_compat:
 348             self.nia = Signal(64)
 349             self.msr_o = Signal(64)
 350             self.nia_req = Signal(1)
 351             self.insn = Signal(32)
 352             self.ldst_req = Signal(1)
 353             self.ldst_addr = Signal(1)
 354
 355         # for pausing dec/tb during an SPR pipeline event, this
 356         # ensures that an SPR write (mtspr) to TB or DEC does not
 357         # get overwritten by the DEC/TB FSM
 358         self.pause_dec_tb = Signal()
 359
 360     def setup_peripherals(self, m):
 361         comb, sync = m.d.comb, m.d.sync
 362
 363         # okaaaay so the debug module must be in coresync clock domain
 364         # but NOT its reset signal. to cope with this, set every single
 365         # submodule explicitly in coresync domain, debug and JTAG
 366         # in their own one but using *external* reset.
 367         csd = DomainRenamer(self.core_domain)
 368         dbd = DomainRenamer(self.dbg_domain)
 369
 370         if self.microwatt_compat or self.fabric_compat:
 371             m.submodules.core = core = self.core
 372         else:
 373             m.submodules.core = core = csd(self.core)
 374
 375         # this _so_ needs sorting out.  ICache is added down inside
 376         # LoadStore1 and is already a submodule of LoadStore1
 377         if not isinstance(self.imem, ICache):
 378             m.submodules.imem = imem = csd(self.imem)
 379
 380         # set up JTAG Debug Module (in correct domain)
 381         m.submodules.dbg = dbg = dbd(self.dbg)
 382         if self.jtag_en:
 383             m.submodules.jtag = jtag = dbd(self.jtag)
 384             # TODO: UART2GDB mux, here, from external pin
 385             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 386             sync += dbg.dmi.connect_to(jtag.dmi)
 387
 388         # fixup the clocks in microwatt-compat mode (but leave resets alone
 389         # so that microwatt soc.vhdl can pull a reset on the core or DMI
 390         # can do it, just like in TestIssuer)
 391         if self.microwatt_compat or self.fabric_compat:
 392             intclk = ClockSignal(self.core_domain)
 393             dbgclk = ClockSignal(self.dbg_domain)
 394             if self.core_domain != 'sync':
 395                 comb += intclk.eq(ClockSignal())
 396             if self.dbg_domain != 'sync':
 397                 comb += dbgclk.eq(ClockSignal())
 398
 399         # if using old version of microwatt
 400         # drop the first 3 bits of the incoming wishbone addresses
 401         if self.microwatt_compat or self.fabric_compat:
 402             ibus = self.imem.ibus
 403             dbus = self.core.l0.cmpi.wb_bus()
 404             if self.microwatt_old:
 405                 comb += self.ibus_adr.eq(Cat(Const(0, 3), ibus.adr))
 406                 comb += self.dbus_adr.eq(Cat(Const(0, 3), dbus.adr))
 407             else:
 408                 comb += self.ibus_adr.eq(ibus.adr)
 409                 comb += self.dbus_adr.eq(dbus.adr)
 410             if self.microwatt_debug:
 411                 # microwatt verilator debug purposes
 412                 pi = self.core.l0.cmpi.pi.pi
 413                 comb += self.ldst_req.eq(pi.addr_ok_o)
 414                 comb += self.ldst_addr.eq(pi.addr)
 415
 416         cur_state = self.cur_state
 417
 418         # 4x 4k SRAM blocks.  these simply "exist", they get routed in fabric
 419         if self.sram4x4k:
 420             for i, sram in enumerate(self.sram4k):
 421                 m.submodules["sram4k_%d" % i] = csd(sram)
 422                 comb += sram.enable.eq(self.wb_sram_en)
 423
 424         # XICS interrupt handler
 425         if self.xics:
 426             m.submodules.xics_icp = icp = csd(self.xics_icp)
 427             m.submodules.xics_ics = ics = csd(self.xics_ics)
 428             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 429             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 430         else:
 431             sync += cur_state.eint.eq(self.ext_irq)  # connect externally
 432
 433         # GPIO test peripheral
 434         if self.gpio:
 435             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 436
 437         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 438         # XXX causes fabric ECP5 test to get wrong idea about input and output
 439         # (but works with verilator sim *sigh*)
 440         # if self.gpio and self.xics:
 441         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 442
 443         # instruction decoder
 444         pdecode = create_pdecode()
 445         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 446         if self.svp64_en:
 447             m.submodules.svp64 = svp64 = csd(self.svp64)
 448
 449         # clock delay power-on reset
 450         cd_por = ClockDomain(reset_less=True)
 451         cd_sync = ClockDomain()
 452         m.domains += cd_por, cd_sync
 453         core_sync = ClockDomain(self.core_domain)
 454         if self.core_domain != "sync":
 455             m.domains += core_sync
 456         if self.dbg_domain != "sync":
 457             dbg_sync = ClockDomain(self.dbg_domain)
 458             m.domains += dbg_sync
 459
 460         # create a delay, but remember it is in the power-on-reset clock domain!
 461         ti_rst = Signal(reset_less=True)
 462         delay = Signal(range(4), reset=3)
 463         stop_delay = Signal(range(16), reset=5)
 464         with m.If(delay != 0):
 465             m.d.por += delay.eq(delay - 1) # decrement... in POR domain!
 466         with m.If(stop_delay != 0):
 467             m.d.por += stop_delay.eq(stop_delay - 1) # likewise
 468         comb += cd_por.clk.eq(ClockSignal())
 469
 470         # power-on reset delay
 471         core_rst = ResetSignal(self.core_domain)
 472         if self.core_domain != "sync":
 473             comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 474             comb += core_rst.eq(ti_rst)
 475         else:
 476             with m.If(delay != 0 | dbg.core_rst_o):
 477                 comb += core_rst.eq(1)
 478         with m.If(stop_delay != 0):
 479             # run DMI core-stop as well but on an extra couple of cycles
 480             comb += dbg.core_stopped_i.eq(1)
 481
 482         # connect external reset signal to DMI Reset
 483         if self.dbg_domain != "sync":
 484             dbg_rst = ResetSignal(self.dbg_domain)
 485             comb += dbg_rst.eq(self.dbg_rst_i)
 486
 487         # busy/halted signals from core
 488         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 489         comb += self.busy_o.eq(core_busy_o)
 490         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 491
 492         # temporary hack: says "go" immediately for both address gen and ST
 493         # XXX: st.go_i is set to 1 cycle delay to reduce combinatorial chains
 494         l0 = core.l0
 495         ldst = core.fus.fus['ldst0']
 496         st_go_edge = rising_edge(m, ldst.st.rel_o)
 497         # link addr-go direct to rel
 498         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 499         m.d.sync += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 500
 501     def do_dmi(self, m, dbg):
 502         """deals with DMI debug requests
 503
 504         currently only provides read requests for the INT regfile, CR and XER
 505         it will later also deal with *writing* to these regfiles.
 506         """
 507         comb = m.d.comb
 508         sync = m.d.sync
 509         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 510         d_fast = dbg.d_fast
 511         intrf = self.core.regs.rf['int']
 512         fastrf = self.core.regs.rf['fast']
 513
 514         with m.If(d_reg.req):  # request for regfile access being made
 515             # TODO: error-check this
 516             # XXX should this be combinatorial?  sync better?
 517             if intrf.unary:
 518                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 519             else:
 520                 comb += self.int_r.addr.eq(d_reg.addr)
 521                 comb += self.int_r.ren.eq(1)
 522         d_reg_delay = Signal()
 523         sync += d_reg_delay.eq(d_reg.req)
 524         with m.If(d_reg_delay):
 525             # data arrives one clock later
 526             comb += d_reg.data.eq(self.int_r.o_data)
 527             comb += d_reg.ack.eq(1)
 528
 529         # fast regfile
 530         with m.If(d_fast.req):  # request for regfile access being made
 531             if fastrf.unary:
 532                 comb += self.fast_r.ren.eq(1 << d_fast.addr)
 533             else:
 534                 comb += self.fast_r.addr.eq(d_fast.addr)
 535                 comb += self.fast_r.ren.eq(1)
 536         d_fast_delay = Signal()
 537         sync += d_fast_delay.eq(d_fast.req)
 538         with m.If(d_fast_delay):
 539             # data arrives one clock later
 540             comb += d_fast.data.eq(self.fast_r.o_data)
 541             comb += d_fast.ack.eq(1)
 542
 543         # sigh same thing for CR debug
 544         with m.If(d_cr.req):  # request for regfile access being made
 545             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 546         d_cr_delay = Signal()
 547         sync += d_cr_delay.eq(d_cr.req)
 548         with m.If(d_cr_delay):
 549             # data arrives one clock later
 550             comb += d_cr.data.eq(self.cr_r.o_data)
 551             comb += d_cr.ack.eq(1)
 552
 553         # aaand XER...
 554         with m.If(d_xer.req):  # request for regfile access being made
 555             comb += self.xer_r.ren.eq(0b111111)  # enable all
 556         d_xer_delay = Signal()
 557         sync += d_xer_delay.eq(d_xer.req)
 558         with m.If(d_xer_delay):
 559             # data arrives one clock later
 560             comb += d_xer.data.eq(self.xer_r.o_data)
 561             comb += d_xer.ack.eq(1)
 562
 563     def tb_dec_fsm(self, m, spr_dec):
 564         """tb_dec_fsm
 565
 566         this is a FSM for updating either dec or tb.  it runs alternately
 567         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 568         value to DEC, however the regfile has "passthrough" on it so this
 569         *should* be ok.
 570
 571         see v3.0B p1097-1099 for Timer Resource and p1065 and p1076
 572         """
 573
 574         comb, sync = m.d.comb, m.d.sync
 575         state_rf = self.core.regs.rf['state']
 576         state_r_dectb = state_rf.r_ports['issue']  # DEC/TB
 577         state_w_dectb = state_rf.w_ports['issue']  # DEC/TB
 578
 579
 580         with m.FSM() as fsm:
 581
 582             # initiates read of current DEC
 583             with m.State("DEC_READ"):
 584                 comb += state_r_dectb.ren.eq(1<<StateRegs.DEC)
 585                 with m.If(~self.pause_dec_tb):
 586                     m.next = "DEC_WRITE"
 587
 588             # waits for DEC read to arrive (1 cycle), updates with new value
 589             # respects if dec/tb writing has been paused
 590             with m.State("DEC_WRITE"):
 591                 with m.If(self.pause_dec_tb):
 592                     # if paused, return to reading
 593                     m.next = "DEC_READ"
 594                 with m.Else():
 595                     new_dec = Signal(64)
 596                     # TODO: MSR.LPCR 32-bit decrement mode
 597                     comb += new_dec.eq(state_r_dectb.o_data - 1)
 598                     comb += state_w_dectb.wen.eq(1<<StateRegs.DEC)
 599                     comb += state_w_dectb.i_data.eq(new_dec)
 600                     # copy to cur_state for decoder, for an interrupt
 601                     sync += spr_dec.eq(new_dec)
 602                     m.next = "TB_READ"
 603
 604             # initiates read of current TB
 605             with m.State("TB_READ"):
 606                 comb += state_r_dectb.ren.eq(1<<StateRegs.TB)
 607                 with m.If(~self.pause_dec_tb):
 608                     m.next = "TB_WRITE"
 609
 610             # waits for read TB to arrive, initiates write of current TB
 611             # respects if dec/tb writing has been paused
 612             with m.State("TB_WRITE"):
 613                 with m.If(self.pause_dec_tb):
 614                     # if paused, return to reading
 615                     m.next = "TB_READ"
 616                 with m.Else():
 617                     new_tb = Signal(64)
 618                     comb += new_tb.eq(state_r_dectb.o_data + 1)
 619                     comb += state_w_dectb.wen.eq(1<<StateRegs.TB)
 620                     comb += state_w_dectb.i_data.eq(new_tb)
 621                     m.next = "DEC_READ"
 622
 623         return m
 624
 625     def elaborate(self, platform):
 626         m = Module()
 627         # convenience
 628         comb, sync = m.d.comb, m.d.sync
 629         cur_state = self.cur_state
 630         pdecode2 = self.pdecode2
 631         dbg = self.dbg
 632
 633         # set up peripherals and core
 634         core_rst = self.core_rst
 635         self.setup_peripherals(m)
 636
 637         # reset current state if core reset requested
 638         with m.If(core_rst):
 639             m.d.sync += self.cur_state.eq(0)
 640             # and, sigh, set configured values, which are also done in regfile
 641             # XXX ??? what the hell is the shift for??
 642             m.d.sync += self.cur_state.pc.eq(self.core.pc_at_reset)
 643             m.d.sync += self.cur_state.msr.eq(self.core.msr_at_reset)
 644
 645         # check halted condition: requested PC to execute matches DMI stop addr
 646         # and immediately stop. address of 0xffff_ffff_ffff_ffff can never
 647         # match
 648         halted = Signal()
 649         comb += halted.eq(dbg.stop_addr_o == dbg.state.pc)
 650         with m.If(halted):
 651             comb += dbg.core_stopped_i.eq(1)
 652             comb += dbg.terminate_i.eq(1)
 653
 654         # PC and instruction from I-Memory
 655         comb += self.pc_o.eq(cur_state.pc)
 656         self.pc_changed = Signal()  # note write to PC
 657         self.msr_changed = Signal()  # note write to MSR
 658         self.sv_changed = Signal()  # note write to SVSTATE
 659
 660         # read state either from incoming override or from regfile
 661         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
 662         state_get(m, state.msr, core_rst, self.msr_i,
 663                        "msr",                  # read MSR
 664                        self.state_r_msr, StateRegs.MSR)
 665         state_get(m, state.pc, core_rst, self.pc_i,
 666                        "pc",                  # read PC
 667                        self.state_r_pc, StateRegs.PC)
 668         state_get(m, state.svstate, core_rst, self.svstate_i,
 669                             "svstate",   # read SVSTATE
 670                             self.state_r_sv, StateRegs.SVSTATE)
 671
 672         # don't write pc every cycle
 673         comb += self.state_w_pc.wen.eq(0)
 674         comb += self.state_w_pc.i_data.eq(0)
 675
 676         # connect up debug state.  note "combinatorially same" below,
 677         # this is a bit naff, passing state over in the dbg class, but
 678         # because it is combinatorial it achieves the desired goal
 679         comb += dbg.state.eq(state)
 680
 681         # this bit doesn't have to be in the FSM: connect up to read
 682         # regfiles on demand from DMI
 683         self.do_dmi(m, dbg)
 684
 685         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
 686         # (which uses that in PowerDecoder2 to raise 0x900 exception)
 687         self.tb_dec_fsm(m, cur_state.dec)
 688
 689         # while stopped, allow updating the MSR, PC and SVSTATE.
 690         # these are mainly for debugging purposes (including DMI/JTAG)
 691         with m.If(dbg.core_stopped_i):
 692             with m.If(self.pc_i.ok):
 693                 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 694                 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 695                 sync += self.pc_changed.eq(1)
 696             with m.If(self.msr_i.ok):
 697                 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 698                 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 699                 sync += self.msr_changed.eq(1)
 700             with m.If(self.svstate_i.ok | self.update_svstate):
 701                 with m.If(self.svstate_i.ok): # over-ride from external source
 702                     comb += self.new_svstate.eq(self.svstate_i.data)
 703                 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 704                 comb += self.state_w_sv.i_data.eq(self.new_svstate)
 705                 sync += self.sv_changed.eq(1)
 706
 707         # start renaming some of the ports to match microwatt
 708         if self.microwatt_compat or self.fabric_compat:
 709             self.core.o.core_terminate_o.name = "terminated_out"
 710             # names of DMI interface
 711             self.dbg.dmi.addr_i.name = 'dmi_addr'
 712             self.dbg.dmi.din.name    = 'dmi_din'
 713             self.dbg.dmi.dout.name   = 'dmi_dout'
 714             self.dbg.dmi.req_i.name  = 'dmi_req'
 715             self.dbg.dmi.we_i.name   = 'dmi_wr'
 716             self.dbg.dmi.ack_o.name  = 'dmi_ack'
 717             # wishbone instruction bus
 718             ibus = self.imem.ibus
 719             if self.microwatt_compat:
 720                 ibus.adr.name = 'wishbone_insn_out.adr'
 721                 ibus.dat_w.name = 'wishbone_insn_out.dat'
 722                 ibus.sel.name = 'wishbone_insn_out.sel'
 723                 ibus.cyc.name = 'wishbone_insn_out.cyc'
 724                 ibus.stb.name = 'wishbone_insn_out.stb'
 725                 ibus.we.name = 'wishbone_insn_out.we'
 726                 ibus.dat_r.name = 'wishbone_insn_in.dat'
 727                 ibus.ack.name = 'wishbone_insn_in.ack'
 728                 ibus.stall.name = 'wishbone_insn_in.stall'
 729             # wishbone data bus
 730             dbus = self.core.l0.cmpi.wb_bus()
 731             if self.microwatt_compat:
 732                 dbus.adr.name = 'wishbone_data_out.adr'
 733                 dbus.dat_w.name = 'wishbone_data_out.dat'
 734                 dbus.sel.name = 'wishbone_data_out.sel'
 735                 dbus.cyc.name = 'wishbone_data_out.cyc'
 736                 dbus.stb.name = 'wishbone_data_out.stb'
 737                 dbus.we.name = 'wishbone_data_out.we'
 738                 dbus.dat_r.name = 'wishbone_data_in.dat'
 739                 dbus.ack.name = 'wishbone_data_in.ack'
 740                 dbus.stall.name = 'wishbone_data_in.stall'
 741
 742         return m
 743
 744     def __iter__(self):
 745         yield from self.pc_i.ports()
 746         yield from self.msr_i.ports()
 747         yield self.pc_o
 748         yield self.memerr_o
 749         yield from self.core.ports()
 750         yield from self.imem.ports()
 751         yield self.core_bigendian_i
 752         yield self.busy_o
 753
 754     def ports(self):
 755         return list(self)
 756
 757     def external_ports(self):
 758         if self.microwatt_compat or self.fabric_compat:
 759             if self.fabric_compat:
 760                 ports = [self.core.o.core_terminate_o,
 761                          self.alt_reset, # not connected yet
 762                          self.nia, self.insn, self.nia_req, self.msr_o,
 763                          self.ldst_req, self.ldst_addr,
 764                          ClockSignal(),
 765                          ResetSignal(),
 766                         ]
 767             else:
 768                 ports = [self.core.o.core_terminate_o,
 769                          self.ext_irq,
 770                          self.alt_reset, # not connected yet
 771                          self.nia, self.insn, self.nia_req, self.msr_o,
 772                          self.ldst_req, self.ldst_addr,
 773                          ClockSignal(),
 774                          ResetSignal(),
 775                         ]
 776             ports += list(self.dbg.dmi.ports())
 777             # for dbus/ibus microwatt, exclude err btw and cti
 778             for name, sig in self.imem.ibus.fields.items():
 779                 if name not in ['err', 'bte', 'cti', 'adr']:
 780                     ports.append(sig)
 781             for name, sig in self.core.l0.cmpi.wb_bus().fields.items():
 782                 if name not in ['err', 'bte', 'cti', 'adr']:
 783                     ports.append(sig)
 784             # microwatt non-compliant with wishbone
 785             ports.append(self.ibus_adr)
 786             ports.append(self.dbus_adr)
 787
 788             if self.microwatt_compat:
 789                 # Ignore the remaining ports in microwatt compat mode
 790                 return ports
 791
 792         ports = self.pc_i.ports()
 793         ports = self.msr_i.ports()
 794         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 795                   ]
 796
 797         if self.jtag_en:
 798             ports += list(self.jtag.external_ports())
 799         else:
 800             # don't add DMI if JTAG is enabled
 801             ports += list(self.dbg.dmi.ports())
 802
 803         ports += list(self.imem.ibus.fields.values())
 804         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 805
 806         if self.sram4x4k:
 807             for sram in self.sram4k:
 808                 ports += list(sram.bus.fields.values())
 809
 810         if self.xics:
 811             ports += list(self.xics_icp.bus.fields.values())
 812             ports += list(self.xics_ics.bus.fields.values())
 813             ports.append(self.int_level_i)
 814         else:
 815             ports.append(self.ext_irq)
 816
 817         if self.gpio:
 818             ports += list(self.simple_gpio.bus.fields.values())
 819             ports.append(self.gpio_o)
 820
 821         return ports
 822
 823     def ports(self):
 824         return list(self)
 825
 826
 827 class TestIssuerInternal(TestIssuerBase):
 828     """TestIssuer - reads instructions from TestMemory and issues them
 829
 830     efficiency and speed is not the main goal here: functional correctness
 831     and code clarity is.  optimisations (which almost 100% interfere with
 832     easy understanding) come later.
 833     """
 834
 835     def fetch_fsm(self, m, dbg, core, core_rst, nia, is_svp64_mode,
 836                         fetch_pc_o_ready, fetch_pc_i_valid,
 837                         fetch_insn_o_valid, fetch_insn_i_ready):
 838         """fetch FSM
 839
 840         this FSM performs fetch of raw instruction data, partial-decodes
 841         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 842         read a 2nd 32-bit quantity if that occurs.
 843         """
 844         comb = m.d.comb
 845         sync = m.d.sync
 846         pdecode2 = self.pdecode2
 847         cur_state = self.cur_state
 848         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 849         pc, msr, svstate = cur_state.pc, cur_state.msr, cur_state.svstate
 850
 851         # also note instruction fetch failed
 852         if hasattr(core, "icache"):
 853             fetch_failed = core.icache.i_out.fetch_failed
 854             flush_needed = True
 855         else:
 856             fetch_failed = Const(0, 1)
 857             flush_needed = False
 858
 859         # set priv / virt mode on I-Cache, sigh
 860         if isinstance(self.imem, ICache):
 861             comb += self.imem.i_in.priv_mode.eq(~msr[MSR.PR])
 862             comb += self.imem.i_in.virt_mode.eq(msr[MSR.IR]) # Instr. Redir (VM)
 863
 864         with m.FSM(name='fetch_fsm'):
 865
 866             # allow fetch to not run at startup due to I-Cache reset not
 867             # having time to settle.  power-on-reset holds dbg.core_stopped_i
 868             with m.State("PRE_IDLE"):
 869                 with m.If(~dbg.core_stopped_i & ~dbg.core_stop_o & ~core_rst):
 870                     m.next = "IDLE"
 871
 872             # waiting (zzz)
 873             with m.State("IDLE"):
 874                 # fetch allowed if not failed and stopped but not stepping
 875                 # (see dmi.py for how core_stop_o is generated)
 876                 with m.If(~fetch_failed & ~dbg.core_stop_o):
 877                     comb += fetch_pc_o_ready.eq(1)
 878                 with m.If(fetch_pc_i_valid & ~pdecode2.instr_fault
 879                           & ~dbg.core_stop_o):
 880                     # instruction allowed to go: start by reading the PC
 881                     # capture the PC and also drop it into Insn Memory
 882                     # we have joined a pair of combinatorial memory
 883                     # lookups together.  this is Generally Bad.
 884                     comb += self.imem.a_pc_i.eq(pc)
 885                     comb += self.imem.a_i_valid.eq(1)
 886                     comb += self.imem.f_i_valid.eq(1)
 887                     m.next = "INSN_READ"  # move to "wait for bus" phase
 888
 889             # dummy pause to find out why simulation is not keeping up
 890             with m.State("INSN_READ"):
 891                 # when using "single-step" mode, checking dbg.stopping_o
 892                 # prevents progress.  allow fetch to proceed once started
 893                 stopping = Const(0)
 894                 #if self.allow_overlap:
 895                 #    stopping = dbg.stopping_o
 896                 with m.If(stopping):
 897                     # stopping: jump back to idle
 898                     m.next = "IDLE"
 899                 with m.Else():
 900                     with m.If(self.imem.f_busy_o &
 901                               ~pdecode2.instr_fault):  # zzz...
 902                         # busy but not fetch failed: stay in wait-read
 903                         comb += self.imem.a_pc_i.eq(pc)
 904                         comb += self.imem.a_i_valid.eq(1)
 905                         comb += self.imem.f_i_valid.eq(1)
 906                     with m.Else():
 907                         # not busy (or fetch failed!): instruction fetched
 908                         # when fetch failed, the instruction gets ignored
 909                         # by the decoder
 910                         if hasattr(core, "icache"):
 911                             # blech, icache returns actual instruction
 912                             insn = self.imem.f_instr_o
 913                         else:
 914                             # but these return raw memory
 915                             insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 916                         if self.svp64_en:
 917                             svp64 = self.svp64
 918                             # decode the SVP64 prefix, if any
 919                             comb += svp64.raw_opcode_in.eq(insn)
 920                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 921                             # pass the decoded prefix (if any) to PowerDecoder2
 922                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 923                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 924                             # remember whether this is a prefixed instruction,
 925                             # so the FSM can readily loop when VL==0
 926                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 927                             # calculate the address of the following instruction
 928                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 929                             sync += nia.eq(cur_state.pc + insn_size)
 930                             with m.If(~svp64.is_svp64_mode):
 931                                 # with no prefix, store the instruction
 932                                 # and hand it directly to the next FSM
 933                                 sync += dec_opcode_i.eq(insn)
 934                                 m.next = "INSN_READY"
 935                             with m.Else():
 936                                 # fetch the rest of the instruction from memory
 937                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 938                                 comb += self.imem.a_i_valid.eq(1)
 939                                 comb += self.imem.f_i_valid.eq(1)
 940                                 m.next = "INSN_READ2"
 941                         else:
 942                             # not SVP64 - 32-bit only
 943                             sync += nia.eq(cur_state.pc + 4)
 944                             sync += dec_opcode_i.eq(insn)
 945                             if self.microwatt_compat or self.fabric_compat:
 946                                 # for verilator debug purposes
 947                                 comb += self.insn.eq(insn)
 948                                 comb += self.nia.eq(cur_state.pc)
 949                                 comb += self.msr_o.eq(cur_state.msr)
 950                                 comb += self.nia_req.eq(1)
 951                             m.next = "INSN_READY"
 952
 953             with m.State("INSN_READ2"):
 954                 with m.If(self.imem.f_busy_o):  # zzz...
 955                     # busy: stay in wait-read
 956                     comb += self.imem.a_i_valid.eq(1)
 957                     comb += self.imem.f_i_valid.eq(1)
 958                 with m.Else():
 959                     # not busy: instruction fetched
 960                     if hasattr(core, "icache"):
 961                         # blech, icache returns actual instruction
 962                         insn = self.imem.f_instr_o
 963                     else:
 964                         insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 965                     sync += dec_opcode_i.eq(insn)
 966                     m.next = "INSN_READY"
 967                     # TODO: probably can start looking at pdecode2.rm_dec
 968                     # here or maybe even in INSN_READ state, if svp64_mode
 969                     # detected, in order to trigger - and wait for - the
 970                     # predicate reading.
 971                     if self.svp64_en:
 972                         pmode = pdecode2.rm_dec.predmode
 973                     """
 974                     if pmode != SVP64PredMode.ALWAYS.value:
 975                         fire predicate loading FSM and wait before
 976                         moving to INSN_READY
 977                     else:
 978                         sync += self.srcmask.eq(-1) # set to all 1s
 979                         sync += self.dstmask.eq(-1) # set to all 1s
 980                         m.next = "INSN_READY"
 981                     """
 982
 983             with m.State("INSN_READY"):
 984                 # hand over the instruction, to be decoded
 985                 comb += fetch_insn_o_valid.eq(1)
 986                 with m.If(fetch_insn_i_ready):
 987                     m.next = "IDLE"
 988
 989
 990     def fetch_predicate_fsm(self, m,
 991                             pred_insn_i_valid, pred_insn_o_ready,
 992                             pred_mask_o_valid, pred_mask_i_ready):
 993         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 994            src/dest predicate masks
 995
 996         https://bugs.libre-soc.org/show_bug.cgi?id=617
 997         the predicates can be read here, by using IntRegs r_ports['pred']
 998         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 999         be done through multiple reads, extracting one relevant at a time.
1000         later, a faster way would be to use the 32-bit-wide CR port but
1001         this is more complex decoding, here.  equivalent code used in
1002         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
1003
1004         note: this ENTIRE FSM is not to be called when svp64 is disabled
1005         """
1006         comb = m.d.comb
1007         sync = m.d.sync
1008         pdecode2 = self.pdecode2
1009         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
1010         predmode = rm_dec.predmode
1011         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
1012         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
1013         # get src/dst step, so we can skip already used mask bits
1014         cur_state = self.cur_state
1015         srcstep = cur_state.svstate.srcstep
1016         dststep = cur_state.svstate.dststep
1017         cur_vl = cur_state.svstate.vl
1018
1019         # decode predicates
1020         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
1021         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
1022         sidx, scrinvert = get_predcr(m, srcpred, 's')
1023         didx, dcrinvert = get_predcr(m, dstpred, 'd')
1024
1025         # store fetched masks, for either intpred or crpred
1026         # when src/dst step is not zero, the skipped mask bits need to be
1027         # shifted-out, before actually storing them in src/dest mask
1028         new_srcmask = Signal(64, reset_less=True)
1029         new_dstmask = Signal(64, reset_less=True)
1030
1031         with m.FSM(name="fetch_predicate"):
1032
1033             with m.State("FETCH_PRED_IDLE"):
1034                 comb += pred_insn_o_ready.eq(1)
1035                 with m.If(pred_insn_i_valid):
1036                     with m.If(predmode == SVP64PredMode.INT):
1037                         # skip fetching destination mask register, when zero
1038                         with m.If(dall1s):
1039                             sync += new_dstmask.eq(-1)
1040                             # directly go to fetch source mask register
1041                             # guaranteed not to be zero (otherwise predmode
1042                             # would be SVP64PredMode.ALWAYS, not INT)
1043                             comb += int_pred.addr.eq(sregread)
1044                             comb += int_pred.ren.eq(1)
1045                             m.next = "INT_SRC_READ"
1046                         # fetch destination predicate register
1047                         with m.Else():
1048                             comb += int_pred.addr.eq(dregread)
1049                             comb += int_pred.ren.eq(1)
1050                             m.next = "INT_DST_READ"
1051                     with m.Elif(predmode == SVP64PredMode.CR):
1052                         # go fetch masks from the CR register file
1053                         sync += new_srcmask.eq(0)
1054                         sync += new_dstmask.eq(0)
1055                         m.next = "CR_READ"
1056                     with m.Else():
1057                         sync += self.srcmask.eq(-1)
1058                         sync += self.dstmask.eq(-1)
1059                         m.next = "FETCH_PRED_DONE"
1060
1061             with m.State("INT_DST_READ"):
1062                 # store destination mask
1063                 inv = Repl(dinvert, 64)
1064                 with m.If(dunary):
1065                     # set selected mask bit for 1<<r3 mode
1066                     dst_shift = Signal(range(64))
1067                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
1068                     sync += new_dstmask.eq(1 << dst_shift)
1069                 with m.Else():
1070                     # invert mask if requested
1071                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
1072                 # skip fetching source mask register, when zero
1073                 with m.If(sall1s):
1074                     sync += new_srcmask.eq(-1)
1075                     m.next = "FETCH_PRED_SHIFT_MASK"
1076                 # fetch source predicate register
1077                 with m.Else():
1078                     comb += int_pred.addr.eq(sregread)
1079                     comb += int_pred.ren.eq(1)
1080                     m.next = "INT_SRC_READ"
1081
1082             with m.State("INT_SRC_READ"):
1083                 # store source mask
1084                 inv = Repl(sinvert, 64)
1085                 with m.If(sunary):
1086                     # set selected mask bit for 1<<r3 mode
1087                     src_shift = Signal(range(64))
1088                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
1089                     sync += new_srcmask.eq(1 << src_shift)
1090                 with m.Else():
1091                     # invert mask if requested
1092                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
1093                 m.next = "FETCH_PRED_SHIFT_MASK"
1094
1095             # fetch masks from the CR register file
1096             # implements the following loop:
1097             # idx, inv = get_predcr(mask)
1098             # mask = 0
1099             # for cr_idx in range(vl):
1100             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
1101             #     if cr[idx] ^ inv:
1102             #         mask |= 1 << cr_idx
1103             # return mask
1104             with m.State("CR_READ"):
1105                 # CR index to be read, which will be ready by the next cycle
1106                 cr_idx = Signal.like(cur_vl, reset_less=True)
1107                 # submit the read operation to the regfile
1108                 with m.If(cr_idx != cur_vl):
1109                     # the CR read port is unary ...
1110                     # ren = 1 << cr_idx
1111                     # ... in MSB0 convention ...
1112                     # ren = 1 << (7 - cr_idx)
1113                     # ... and with an offset:
1114                     # ren = 1 << (7 - off - cr_idx)
1115                     idx = SVP64CROffs.CRPred + cr_idx
1116                     comb += cr_pred.ren.eq(1 << (7 - idx))
1117                     # signal data valid in the next cycle
1118                     cr_read = Signal(reset_less=True)
1119                     sync += cr_read.eq(1)
1120                     # load the next index
1121                     sync += cr_idx.eq(cr_idx + 1)
1122                 with m.Else():
1123                     # exit on loop end
1124                     sync += cr_read.eq(0)
1125                     sync += cr_idx.eq(0)
1126                     m.next = "FETCH_PRED_SHIFT_MASK"
1127                 with m.If(cr_read):
1128                     # compensate for the one cycle delay on the regfile
1129                     cur_cr_idx = Signal.like(cur_vl)
1130                     comb += cur_cr_idx.eq(cr_idx - 1)
1131                     # read the CR field, select the appropriate bit
1132                     cr_field = Signal(4)
1133                     scr_bit = Signal()
1134                     dcr_bit = Signal()
1135                     comb += cr_field.eq(cr_pred.o_data)
1136                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
1137                                        ^ scrinvert)
1138                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
1139                                        ^ dcrinvert)
1140                     # set the corresponding mask bit
1141                     bit_to_set = Signal.like(self.srcmask)
1142                     comb += bit_to_set.eq(1 << cur_cr_idx)
1143                     with m.If(scr_bit):
1144                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
1145                     with m.If(dcr_bit):
1146                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
1147
1148             with m.State("FETCH_PRED_SHIFT_MASK"):
1149                 # shift-out skipped mask bits
1150                 sync += self.srcmask.eq(new_srcmask >> srcstep)
1151                 sync += self.dstmask.eq(new_dstmask >> dststep)
1152                 m.next = "FETCH_PRED_DONE"
1153
1154             with m.State("FETCH_PRED_DONE"):
1155                 comb += pred_mask_o_valid.eq(1)
1156                 with m.If(pred_mask_i_ready):
1157                     m.next = "FETCH_PRED_IDLE"
1158
1159     def issue_fsm(self, m, core, nia,
1160                   dbg, core_rst, is_svp64_mode,
1161                   fetch_pc_o_ready, fetch_pc_i_valid,
1162                   fetch_insn_o_valid, fetch_insn_i_ready,
1163                   pred_insn_i_valid, pred_insn_o_ready,
1164                   pred_mask_o_valid, pred_mask_i_ready,
1165                   exec_insn_i_valid, exec_insn_o_ready,
1166                   exec_pc_o_valid, exec_pc_i_ready):
1167         """issue FSM
1168
1169         decode / issue FSM.  this interacts with the "fetch" FSM
1170         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
1171         (outgoing). also interacts with the "execute" FSM
1172         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
1173         (incoming).
1174         SVP64 RM prefixes have already been set up by the
1175         "fetch" phase, so execute is fairly straightforward.
1176         """
1177
1178         comb = m.d.comb
1179         sync = m.d.sync
1180         pdecode2 = self.pdecode2
1181         cur_state = self.cur_state
1182         new_svstate = self.new_svstate
1183
1184         # temporaries
1185         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
1186
1187         # for updating svstate (things like srcstep etc.)
1188         comb += new_svstate.eq(cur_state.svstate)
1189
1190         # precalculate srcstep+1 and dststep+1
1191         cur_srcstep = cur_state.svstate.srcstep
1192         cur_dststep = cur_state.svstate.dststep
1193         next_srcstep = Signal.like(cur_srcstep)
1194         next_dststep = Signal.like(cur_dststep)
1195         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
1196         comb += next_dststep.eq(cur_state.svstate.dststep+1)
1197
1198         # note if an exception happened.  in a pipelined or OoO design
1199         # this needs to be accompanied by "shadowing" (or stalling)
1200         exc_happened = self.core.o.exc_happened
1201         # also note instruction fetch failed
1202         if hasattr(core, "icache"):
1203             fetch_failed = core.icache.i_out.fetch_failed
1204             flush_needed = True
1205             # set to fault in decoder
1206             # update (highest priority) instruction fault
1207             rising_fetch_failed = rising_edge(m, fetch_failed)
1208             with m.If(rising_fetch_failed):
1209                 sync += pdecode2.instr_fault.eq(1)
1210         else:
1211             fetch_failed = Const(0, 1)
1212             flush_needed = False
1213
1214         sync += fetch_pc_i_valid.eq(0)
1215
1216         with m.FSM(name="issue_fsm"):
1217
1218             with m.State("PRE_IDLE"):
1219                 with m.If(~dbg.core_stop_o & ~core_rst):
1220                     m.next = "ISSUE_START"
1221
1222             # sync with the "fetch" phase which is reading the instruction
1223             # at this point, there is no instruction running, that
1224             # could inadvertently update the PC.
1225             with m.State("ISSUE_START"):
1226                 # reset instruction fault
1227                 sync += pdecode2.instr_fault.eq(0)
1228                 # wait on "core stop" release, before next fetch
1229                 # need to do this here, in case we are in a VL==0 loop
1230                 with m.If(~dbg.core_stop_o & ~core_rst):
1231                     sync += fetch_pc_i_valid.eq(1)  # tell fetch to start
1232                     sync += cur_state.pc.eq(dbg.state.pc)
1233                     sync += cur_state.svstate.eq(dbg.state.svstate)
1234                     sync += cur_state.msr.eq(dbg.state.msr)
1235                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
1236                         m.next = "INSN_WAIT"
1237                 with m.Else():
1238                     # tell core it's stopped, and acknowledge debug handshake
1239                     comb += dbg.core_stopped_i.eq(1)
1240                     # while stopped, allow updating SVSTATE
1241                     with m.If(self.svstate_i.ok):
1242                         comb += new_svstate.eq(self.svstate_i.data)
1243                         comb += self.update_svstate.eq(1)
1244                         sync += self.sv_changed.eq(1)
1245
1246             # wait for an instruction to arrive from Fetch
1247             with m.State("INSN_WAIT"):
1248                 # when using "single-step" mode, checking dbg.stopping_o
1249                 # prevents progress.  allow issue to proceed once started
1250                 stopping = Const(0)
1251                 #if self.allow_overlap:
1252                 #    stopping = dbg.stopping_o
1253                 with m.If(stopping):
1254                     # stopping: jump back to idle
1255                     m.next = "ISSUE_START"
1256                     if flush_needed:
1257                         # request the icache to stop asserting "failed"
1258                         comb += core.icache.flush_in.eq(1)
1259                     # stop instruction fault
1260                     sync += pdecode2.instr_fault.eq(0)
1261                 with m.Else():
1262                     comb += fetch_insn_i_ready.eq(1)
1263                     with m.If(fetch_insn_o_valid):
1264                         # loop into ISSUE_START if it's a SVP64 instruction
1265                         # and VL == 0.  this because VL==0 is a for-loop
1266                         # from 0 to 0 i.e. always, always a NOP.
1267                         cur_vl = cur_state.svstate.vl
1268                         with m.If(is_svp64_mode & (cur_vl == 0)):
1269                             # update the PC before fetching the next instruction
1270                             # since we are in a VL==0 loop, no instruction was
1271                             # executed that we could be overwriting
1272                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1273                             comb += self.state_w_pc.i_data.eq(nia)
1274                             comb += self.insn_done.eq(1)
1275                             m.next = "ISSUE_START"
1276                         with m.Else():
1277                             if self.svp64_en:
1278                                 m.next = "PRED_START"  # fetching predicate
1279                             else:
1280                                 m.next = "DECODE_SV"  # skip predication
1281
1282             with m.State("PRED_START"):
1283                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1284                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1285                     m.next = "MASK_WAIT"
1286
1287             with m.State("MASK_WAIT"):
1288                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1289                 with m.If(pred_mask_o_valid):  # predication masks are ready
1290                     m.next = "PRED_SKIP"
1291
1292             # skip zeros in predicate
1293             with m.State("PRED_SKIP"):
1294                 with m.If(~is_svp64_mode):
1295                     m.next = "DECODE_SV"  # nothing to do
1296                 with m.Else():
1297                     if self.svp64_en:
1298                         pred_src_zero = pdecode2.rm_dec.pred_sz
1299                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1300
1301                         # new srcstep, after skipping zeros
1302                         skip_srcstep = Signal.like(cur_srcstep)
1303                         # value to be added to the current srcstep
1304                         src_delta = Signal.like(cur_srcstep)
1305                         # add leading zeros to srcstep, if not in zero mode
1306                         with m.If(~pred_src_zero):
1307                             # priority encoder (count leading zeros)
1308                             # append guard bit, in case the mask is all zeros
1309                             pri_enc_src = PriorityEncoder(65)
1310                             m.submodules.pri_enc_src = pri_enc_src
1311                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1312                                                          Const(1, 1)))
1313                             comb += src_delta.eq(pri_enc_src.o)
1314                         # apply delta to srcstep
1315                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1316                         # shift-out all leading zeros from the mask
1317                         # plus the leading "one" bit
1318                         # TODO count leading zeros and shift-out the zero
1319                         #      bits, in the same step, in hardware
1320                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1321
1322                         # same as above, but for dststep
1323                         skip_dststep = Signal.like(cur_dststep)
1324                         dst_delta = Signal.like(cur_dststep)
1325                         with m.If(~pred_dst_zero):
1326                             pri_enc_dst = PriorityEncoder(65)
1327                             m.submodules.pri_enc_dst = pri_enc_dst
1328                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1329                                                          Const(1, 1)))
1330                             comb += dst_delta.eq(pri_enc_dst.o)
1331                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1332                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1333
1334                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1335                         with m.If((skip_srcstep >= cur_vl) |
1336                                   (skip_dststep >= cur_vl)):
1337                             # end of VL loop. Update PC and reset src/dst step
1338                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1339                             comb += self.state_w_pc.i_data.eq(nia)
1340                             comb += new_svstate.srcstep.eq(0)
1341                             comb += new_svstate.dststep.eq(0)
1342                             comb += self.update_svstate.eq(1)
1343                             # synchronize with the simulator
1344                             comb += self.insn_done.eq(1)
1345                             # go back to Issue
1346                             m.next = "ISSUE_START"
1347                         with m.Else():
1348                             # update new src/dst step
1349                             comb += new_svstate.srcstep.eq(skip_srcstep)
1350                             comb += new_svstate.dststep.eq(skip_dststep)
1351                             comb += self.update_svstate.eq(1)
1352                             # proceed to Decode
1353                             m.next = "DECODE_SV"
1354
1355                         # pass predicate mask bits through to satellite decoders
1356                         # TODO: for SIMD this will be *multiple* bits
1357                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1358                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1359
1360             # after src/dst step have been updated, we are ready
1361             # to decode the instruction
1362             with m.State("DECODE_SV"):
1363                 # decode the instruction
1364                 with m.If(~fetch_failed):
1365                     sync += pdecode2.instr_fault.eq(0)
1366                 sync += core.i.e.eq(pdecode2.e)
1367                 sync += core.i.state.eq(cur_state)
1368                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1369                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1370                 if self.svp64_en:
1371                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1372                     # set RA_OR_ZERO detection in satellite decoders
1373                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1374                     # and svp64 detection
1375                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1376                     # and svp64 bit-rev'd ldst mode
1377                     ldst_dec = pdecode2.use_svp64_ldst_dec
1378                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1379                 # after decoding, reset any previous exception condition,
1380                 # allowing it to be set again during the next execution
1381                 sync += pdecode2.ldst_exc.eq(0)
1382
1383                 m.next = "INSN_EXECUTE"  # move to "execute"
1384
1385             # handshake with execution FSM, move to "wait" once acknowledged
1386             with m.State("INSN_EXECUTE"):
1387                 # when using "single-step" mode, checking dbg.stopping_o
1388                 # prevents progress.  allow execute to proceed once started
1389                 stopping = Const(0)
1390                 #if self.allow_overlap:
1391                 #    stopping = dbg.stopping_o
1392                 with m.If(stopping):
1393                     # stopping: jump back to idle
1394                     m.next = "ISSUE_START"
1395                     if flush_needed:
1396                         # request the icache to stop asserting "failed"
1397                         comb += core.icache.flush_in.eq(1)
1398                     # stop instruction fault
1399                     sync += pdecode2.instr_fault.eq(0)
1400                 with m.Else():
1401                     comb += exec_insn_i_valid.eq(1)  # trigger execute
1402                     with m.If(exec_insn_o_ready):   # execute acknowledged us
1403                         m.next = "EXECUTE_WAIT"
1404
1405             with m.State("EXECUTE_WAIT"):
1406                 comb += exec_pc_i_ready.eq(1)
1407                 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1408                 # the exception info needs to be blatted into
1409                 # pdecode.ldst_exc, and the instruction "re-run".
1410                 # when ldst_exc.happened is set, the PowerDecoder2
1411                 # reacts very differently: it re-writes the instruction
1412                 # with a "trap" (calls PowerDecoder2.trap()) which
1413                 # will *overwrite* whatever was requested and jump the
1414                 # PC to the exception address, as well as alter MSR.
1415                 # nothing else needs to be done other than to note
1416                 # the change of PC and MSR (and, later, SVSTATE)
1417                 with m.If(exc_happened):
1418                     mmu = core.fus.get_exc("mmu0")
1419                     ldst = core.fus.get_exc("ldst0")
1420                     if mmu is not None:
1421                         with m.If(fetch_failed):
1422                             # instruction fetch: exception is from MMU
1423                             # reset instr_fault (highest priority)
1424                             sync += pdecode2.ldst_exc.eq(mmu)
1425                             sync += pdecode2.instr_fault.eq(0)
1426                             if flush_needed:
1427                                 # request icache to stop asserting "failed"
1428                                 comb += core.icache.flush_in.eq(1)
1429                     with m.If(~fetch_failed):
1430                         # otherwise assume it was a LDST exception
1431                         sync += pdecode2.ldst_exc.eq(ldst)
1432
1433                 with m.If(exec_pc_o_valid):
1434
1435                     # was this the last loop iteration?
1436                     is_last = Signal()
1437                     cur_vl = cur_state.svstate.vl
1438                     comb += is_last.eq(next_srcstep == cur_vl)
1439
1440                     with m.If(pdecode2.instr_fault):
1441                         # reset instruction fault, try again
1442                         sync += pdecode2.instr_fault.eq(0)
1443                         m.next = "ISSUE_START"
1444
1445                     # return directly to Decode if Execute generated an
1446                     # exception.
1447                     with m.Elif(pdecode2.ldst_exc.happened):
1448                         m.next = "DECODE_SV"
1449
1450                     # if MSR, PC or SVSTATE were changed by the previous
1451                     # instruction, go directly back to Fetch, without
1452                     # updating either MSR PC or SVSTATE
1453                     with m.Elif(self.msr_changed | self.pc_changed |
1454                                 self.sv_changed):
1455                         m.next = "ISSUE_START"
1456
1457                     # also return to Fetch, when no output was a vector
1458                     # (regardless of SRCSTEP and VL), or when the last
1459                     # instruction was really the last one of the VL loop
1460                     with m.Elif((~pdecode2.loop_continue) | is_last):
1461                         # before going back to fetch, update the PC state
1462                         # register with the NIA.
1463                         # ok here we are not reading the branch unit.
1464                         # TODO: this just blithely overwrites whatever
1465                         #       pipeline updated the PC
1466                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1467                         comb += self.state_w_pc.i_data.eq(nia)
1468                         # reset SRCSTEP before returning to Fetch
1469                         if self.svp64_en:
1470                             with m.If(pdecode2.loop_continue):
1471                                 comb += new_svstate.srcstep.eq(0)
1472                                 comb += new_svstate.dststep.eq(0)
1473                                 comb += self.update_svstate.eq(1)
1474                         else:
1475                             comb += new_svstate.srcstep.eq(0)
1476                             comb += new_svstate.dststep.eq(0)
1477                             comb += self.update_svstate.eq(1)
1478                         m.next = "ISSUE_START"
1479
1480                     # returning to Execute? then, first update SRCSTEP
1481                     with m.Else():
1482                         comb += new_svstate.srcstep.eq(next_srcstep)
1483                         comb += new_svstate.dststep.eq(next_dststep)
1484                         comb += self.update_svstate.eq(1)
1485                         # return to mask skip loop
1486                         m.next = "PRED_SKIP"
1487
1488
1489         # check if svstate needs updating: if so, write it to State Regfile
1490         with m.If(self.update_svstate):
1491             sync += cur_state.svstate.eq(self.new_svstate)  # for next clock
1492
1493     def execute_fsm(self, m, core,
1494                     exec_insn_i_valid, exec_insn_o_ready,
1495                     exec_pc_o_valid, exec_pc_i_ready):
1496         """execute FSM
1497
1498         execute FSM. this interacts with the "issue" FSM
1499         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1500         (outgoing). SVP64 RM prefixes have already been set up by the
1501         "issue" phase, so execute is fairly straightforward.
1502         """
1503
1504         comb = m.d.comb
1505         sync = m.d.sync
1506         dbg = self.dbg
1507         pdecode2 = self.pdecode2
1508         cur_state = self.cur_state
1509
1510         # temporaries
1511         core_busy_o = core.n.o_data.busy_o  # core is busy
1512         core_ivalid_i = core.p.i_valid              # instruction is valid
1513
1514         if hasattr(core, "icache"):
1515             fetch_failed = core.icache.i_out.fetch_failed
1516         else:
1517             fetch_failed = Const(0, 1)
1518
1519         with m.FSM(name="exec_fsm"):
1520
1521             # waiting for instruction bus (stays there until not busy)
1522             with m.State("INSN_START"):
1523                 comb += exec_insn_o_ready.eq(1)
1524                 with m.If(exec_insn_i_valid):
1525                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1526                     sync += self.sv_changed.eq(0)
1527                     sync += self.pc_changed.eq(0)
1528                     sync += self.msr_changed.eq(0)
1529                     with m.If(core.p.o_ready):  # only move if accepted
1530                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1531
1532             # instruction started: must wait till it finishes
1533             with m.State("INSN_ACTIVE"):
1534                 # note changes to MSR, PC and SVSTATE
1535                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1536                     sync += self.sv_changed.eq(1)
1537                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1538                     sync += self.msr_changed.eq(1)
1539                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1540                     sync += self.pc_changed.eq(1)
1541                 # and note changes to DEC/TB, to be passed to DEC/TB FSM
1542                 with m.If(self.state_spr.wen & (1 << StateRegs.TB)):
1543                     comb += self.pause_dec_tb.eq(1)
1544                 # but also zero-out the cur_state DEC so that, on
1545                 # the next instruction, if it is "enable interrupt"
1546                 # the delay between the DEC/TB FSM reading and updating
1547                 # cur_state.dec doesn't trigger a spurious interrupt.
1548                 # the DEC/TB FSM will read the regfile and update to
1549                 # the correct value, so having cur_state.dec set to zero
1550                 # for a while is no big deal.
1551                 with m.If(self.state_spr.wen & (1 << StateRegs.DEC)):
1552                     comb += self.pause_dec_tb.eq(1)
1553                     sync += cur_state.dec.eq(0) # only needs top bit clear
1554                 with m.If(~core_busy_o):  # instruction done!
1555                     comb += exec_pc_o_valid.eq(1)
1556                     with m.If(exec_pc_i_ready):
1557                         # when finished, indicate "done".
1558                         # however, if there was an exception, the instruction
1559                         # is *not* yet done.  this is an implementation
1560                         # detail: we choose to implement exceptions by
1561                         # taking the exception information from the LDST
1562                         # unit, putting that *back* into the PowerDecoder2,
1563                         # and *re-running the entire instruction*.
1564                         # if we erroneously indicate "done" here, it is as if
1565                         # there were *TWO* instructions:
1566                         # 1) the failed LDST 2) a TRAP.
1567                         with m.If(~pdecode2.ldst_exc.happened &
1568                                    ~pdecode2.instr_fault):
1569                             comb += self.insn_done.eq(1)
1570                         m.next = "INSN_START"  # back to fetch
1571                 # terminate returns directly to INSN_START
1572                 with m.If(dbg.terminate_i):
1573                     # comb += self.insn_done.eq(1) - no because it's not
1574                     m.next = "INSN_START"  # back to fetch
1575
1576     def elaborate(self, platform):
1577         m = super().elaborate(platform)
1578         # convenience
1579         comb, sync = m.d.comb, m.d.sync
1580         cur_state = self.cur_state
1581         pdecode2 = self.pdecode2
1582         dbg = self.dbg
1583         core = self.core
1584
1585         # set up peripherals and core
1586         core_rst = self.core_rst
1587
1588         # indicate to outside world if any FU is still executing
1589         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1590
1591         # address of the next instruction, in the absence of a branch
1592         # depends on the instruction size
1593         nia = Signal(64)
1594
1595         # connect up debug signals
1596         with m.If(core.o.core_terminate_o):
1597             comb += dbg.terminate_i.eq(1)
1598
1599         # pass the prefix mode from Fetch to Issue, so the latter can loop
1600         # on VL==0
1601         is_svp64_mode = Signal()
1602
1603         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1604         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1605         # these are the handshake signals between each
1606
1607         # fetch FSM can run as soon as the PC is valid
1608         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1609         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1610
1611         # fetch FSM hands over the instruction to be decoded / issued
1612         fetch_insn_o_valid = Signal()
1613         fetch_insn_i_ready = Signal()
1614
1615         # predicate fetch FSM decodes and fetches the predicate
1616         pred_insn_i_valid = Signal()
1617         pred_insn_o_ready = Signal()
1618
1619         # predicate fetch FSM delivers the masks
1620         pred_mask_o_valid = Signal()
1621         pred_mask_i_ready = Signal()
1622
1623         # issue FSM delivers the instruction to the be executed
1624         exec_insn_i_valid = Signal()
1625         exec_insn_o_ready = Signal()
1626
1627         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1628         exec_pc_o_valid = Signal()
1629         exec_pc_i_ready = Signal()
1630
1631         # the FSMs here are perhaps unusual in that they detect conditions
1632         # then "hold" information, combinatorially, for the core
1633         # (as opposed to using sync - which would be on a clock's delay)
1634         # this includes the actual opcode, valid flags and so on.
1635
1636         # Fetch, then predicate fetch, then Issue, then Execute.
1637         # Issue is where the VL for-loop # lives.  the ready/valid
1638         # signalling is used to communicate between the four.
1639
1640         self.fetch_fsm(m, dbg, core, core_rst, nia, is_svp64_mode,
1641                        fetch_pc_o_ready, fetch_pc_i_valid,
1642                        fetch_insn_o_valid, fetch_insn_i_ready)
1643
1644         self.issue_fsm(m, core, nia,
1645                        dbg, core_rst, is_svp64_mode,
1646                        fetch_pc_o_ready, fetch_pc_i_valid,
1647                        fetch_insn_o_valid, fetch_insn_i_ready,
1648                        pred_insn_i_valid, pred_insn_o_ready,
1649                        pred_mask_o_valid, pred_mask_i_ready,
1650                        exec_insn_i_valid, exec_insn_o_ready,
1651                        exec_pc_o_valid, exec_pc_i_ready)
1652
1653         if self.svp64_en:
1654             self.fetch_predicate_fsm(m,
1655                                      pred_insn_i_valid, pred_insn_o_ready,
1656                                      pred_mask_o_valid, pred_mask_i_ready)
1657
1658         self.execute_fsm(m, core,
1659                          exec_insn_i_valid, exec_insn_o_ready,
1660                          exec_pc_o_valid, exec_pc_i_ready)
1661
1662         # whatever was done above, over-ride it if core reset is held.
1663         # set NIA to pc_at_reset
1664         with m.If(core_rst):
1665             sync += nia.eq(self.core.pc_at_reset)
1666
1667         return m
1668
1669
1670 class TestIssuer(Elaboratable):
1671     def __init__(self, pspec):
1672         self.ti = TestIssuerInternal(pspec)
1673         self.pll = DummyPLL(instance=True)
1674
1675         self.dbg_rst_i = Signal(reset_less=True)
1676
1677         # PLL direct clock or not
1678         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1679         if self.pll_en:
1680             self.pll_test_o = Signal(reset_less=True)
1681             self.pll_vco_o = Signal(reset_less=True)
1682             self.clk_sel_i = Signal(2, reset_less=True)
1683             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1684             self.pllclk_clk = ClockSignal("pllclk")
1685
1686     def elaborate(self, platform):
1687         m = Module()
1688         comb = m.d.comb
1689
1690         # TestIssuer nominally runs at main clock, actually it is
1691         # all combinatorial internally except for coresync'd components
1692         m.submodules.ti = ti = self.ti
1693
1694         if self.pll_en:
1695             # ClockSelect runs at PLL output internal clock rate
1696             m.submodules.wrappll = pll = self.pll
1697
1698             # add clock domains from PLL
1699             cd_pll = ClockDomain("pllclk")
1700             m.domains += cd_pll
1701
1702             # PLL clock established.  has the side-effect of running clklsel
1703             # at the PLL's speed (see DomainRenamer("pllclk") above)
1704             pllclk = self.pllclk_clk
1705             comb += pllclk.eq(pll.clk_pll_o)
1706
1707             # wire up external 24mhz to PLL
1708             #comb += pll.clk_24_i.eq(self.ref_clk)
1709             # output 18 mhz PLL test signal, and analog oscillator out
1710             comb += self.pll_test_o.eq(pll.pll_test_o)
1711             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1712
1713             # input to pll clock selection
1714             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1715
1716             # now wire up ResetSignals.  don't mind them being in this domain
1717             pll_rst = ResetSignal("pllclk")
1718             comb += pll_rst.eq(ResetSignal())
1719
1720         # internal clock is set to selector clock-out.  has the side-effect of
1721         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1722         # debug clock runs at coresync internal clock
1723         if self.ti.dbg_domain != 'sync':
1724             cd_dbgsync = ClockDomain("dbgsync")
1725         intclk = ClockSignal(self.ti.core_domain)
1726         dbgclk = ClockSignal(self.ti.dbg_domain)
1727         # XXX BYPASS PLL XXX
1728         # XXX BYPASS PLL XXX
1729         # XXX BYPASS PLL XXX
1730         if self.pll_en:
1731             comb += intclk.eq(self.ref_clk)
1732             assert self.ti.core_domain != 'sync', \
1733                 "cannot set core_domain to sync and use pll at the same time"
1734         else:
1735             if self.ti.core_domain != 'sync':
1736                 comb += intclk.eq(ClockSignal())
1737         if self.ti.dbg_domain != 'sync':
1738             dbgclk = ClockSignal(self.ti.dbg_domain)
1739             comb += dbgclk.eq(intclk)
1740         comb += self.ti.dbg_rst_i.eq(self.dbg_rst_i)
1741
1742         return m
1743
1744     def ports(self):
1745         return list(self.ti.ports()) + list(self.pll.ports()) + \
1746             [ClockSignal(), ResetSignal()]
1747
1748     def external_ports(self):
1749         ports = self.ti.external_ports()
1750         ports.append(ClockSignal())
1751         ports.append(ResetSignal())
1752         if self.pll_en:
1753             ports.append(self.clk_sel_i)
1754             ports.append(self.pll.clk_24_i)
1755             ports.append(self.pll_test_o)
1756             ports.append(self.pll_vco_o)
1757             ports.append(self.pllclk_clk)
1758             ports.append(self.ref_clk)
1759         return ports
1760
1761
1762 if __name__ == '__main__':
1763     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1764              'spr': 1,
1765              'div': 1,
1766              'mul': 1,
1767              'shiftrot': 1
1768              }
1769     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1770                          imem_ifacetype='bare_wb',
1771                          addr_wid=64,
1772                          mask_wid=8,
1773                          reg_wid=64,
1774                          units=units)
1775     dut = TestIssuer(pspec)
1776     vl = main(dut, ports=dut.ports(), name="test_issuer")
1777
1778     if len(sys.argv) == 1:
1779         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1780         with open("test_issuer.il", "w") as f:
1781             f.write(vl)