src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the {insert state variable here}
  70     res_ok_delay = Signal(name="%s_ok_delay" % name)
  71     with m.If(~core_rst):
  72         sync += res_ok_delay.eq(~state_i.ok)
  73         with m.If(state_i.ok):
  74             # incoming override (start from pc_i)
  75             comb += res.eq(state_i.data)
  76         with m.Else():
  77             # otherwise read StateRegs regfile for {insert state here}...
  78             comb += regfile.ren.eq(1 << regnum)
  79         # ... but on a 1-clock delay
  80         with m.If(res_ok_delay):
  81             comb += res.eq(regfile.o_data)
  82
  83
  84 def get_predint(m, mask, name):
  85     """decode SVP64 predicate integer mask field to reg number and invert
  86     this is identical to the equivalent function in ISACaller except that
  87     it doesn't read the INT directly, it just decodes "what needs to be done"
  88     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  89
  90     * all1s is set to indicate that no mask is to be applied.
  91     * regread indicates the GPR register number to be read
  92     * invert is set to indicate that the register value is to be inverted
  93     * unary indicates that the contents of the register is to be shifted 1<<r3
  94     """
  95     comb = m.d.comb
  96     regread = Signal(5, name=name+"regread")
  97     invert = Signal(name=name+"invert")
  98     unary = Signal(name=name+"unary")
  99     all1s = Signal(name=name+"all1s")
 100     with m.Switch(mask):
 101         with m.Case(SVP64PredInt.ALWAYS.value):
 102             comb += all1s.eq(1)      # use 0b1111 (all ones)
 103         with m.Case(SVP64PredInt.R3_UNARY.value):
 104             comb += regread.eq(3)
 105             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 106         with m.Case(SVP64PredInt.R3.value):
 107             comb += regread.eq(3)
 108         with m.Case(SVP64PredInt.R3_N.value):
 109             comb += regread.eq(3)
 110             comb += invert.eq(1)
 111         with m.Case(SVP64PredInt.R10.value):
 112             comb += regread.eq(10)
 113         with m.Case(SVP64PredInt.R10_N.value):
 114             comb += regread.eq(10)
 115             comb += invert.eq(1)
 116         with m.Case(SVP64PredInt.R30.value):
 117             comb += regread.eq(30)
 118         with m.Case(SVP64PredInt.R30_N.value):
 119             comb += regread.eq(30)
 120             comb += invert.eq(1)
 121     return regread, invert, unary, all1s
 122
 123
 124 def get_predcr(m, mask, name):
 125     """decode SVP64 predicate CR to reg number field and invert status
 126     this is identical to _get_predcr in ISACaller
 127     """
 128     comb = m.d.comb
 129     idx = Signal(2, name=name+"idx")
 130     invert = Signal(name=name+"crinvert")
 131     with m.Switch(mask):
 132         with m.Case(SVP64PredCR.LT.value):
 133             comb += idx.eq(CR.LT)
 134             comb += invert.eq(0)
 135         with m.Case(SVP64PredCR.GE.value):
 136             comb += idx.eq(CR.LT)
 137             comb += invert.eq(1)
 138         with m.Case(SVP64PredCR.GT.value):
 139             comb += idx.eq(CR.GT)
 140             comb += invert.eq(0)
 141         with m.Case(SVP64PredCR.LE.value):
 142             comb += idx.eq(CR.GT)
 143             comb += invert.eq(1)
 144         with m.Case(SVP64PredCR.EQ.value):
 145             comb += idx.eq(CR.EQ)
 146             comb += invert.eq(0)
 147         with m.Case(SVP64PredCR.NE.value):
 148             comb += idx.eq(CR.EQ)
 149             comb += invert.eq(1)
 150         with m.Case(SVP64PredCR.SO.value):
 151             comb += idx.eq(CR.SO)
 152             comb += invert.eq(0)
 153         with m.Case(SVP64PredCR.NS.value):
 154             comb += idx.eq(CR.SO)
 155             comb += invert.eq(1)
 156     return idx, invert
 157
 158
 159 class TestIssuerBase:
 160     """TestIssuerBase - base class for Issuers
 161     """
 162
 163     def __init__(self, pspec):
 164
 165         # test is SVP64 is to be enabled
 166         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 167
 168         # and if regfiles are reduced
 169         self.regreduce_en = (hasattr(pspec, "regreduce") and
 170                              (pspec.regreduce == True))
 171
 172         # and if overlap requested
 173         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 174                               (pspec.allow_overlap == True))
 175
 176         # JTAG interface.  add this right at the start because if it's
 177         # added it *modifies* the pspec, by adding enable/disable signals
 178         # for parts of the rest of the core
 179         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 180         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 181         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 182         if self.jtag_en:
 183             # XXX MUST keep this up-to-date with litex, and
 184             # soc-cocotb-sim, and err.. all needs sorting out, argh
 185             subset = ['uart',
 186                       'mtwi',
 187                       'eint', 'gpio', 'mspi0',
 188                       # 'mspi1', - disabled for now
 189                       # 'pwm', 'sd0', - disabled for now
 190                       'sdr']
 191             self.jtag = JTAG(get_pinspecs(subset=subset),
 192                              domain=self.dbg_domain)
 193             # add signals to pspec to enable/disable icache and dcache
 194             # (or data and intstruction wishbone if icache/dcache not included)
 195             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 196             # TODO: do we actually care if these are not domain-synchronised?
 197             # honestly probably not.
 198             pspec.wb_icache_en = self.jtag.wb_icache_en
 199             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 200             self.wb_sram_en = self.jtag.wb_sram_en
 201         else:
 202             self.wb_sram_en = Const(1)
 203
 204         # add 4k sram blocks?
 205         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 206                          pspec.sram4x4kblock == True)
 207         if self.sram4x4k:
 208             self.sram4k = []
 209             for i in range(4):
 210                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 211                                                     # features={'err'}
 212                                                     ))
 213
 214         # add interrupt controller?
 215         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 216         if self.xics:
 217             self.xics_icp = XICS_ICP()
 218             self.xics_ics = XICS_ICS()
 219             self.int_level_i = self.xics_ics.int_level_i
 220
 221         # add GPIO peripheral?
 222         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 223         if self.gpio:
 224             self.simple_gpio = SimpleGPIO()
 225             self.gpio_o = self.simple_gpio.gpio_o
 226
 227         # main instruction core.  suitable for prototyping / demo only
 228         self.core = core = NonProductionCore(pspec)
 229         self.core_rst = ResetSignal("coresync")
 230
 231         # instruction decoder.  goes into Trap Record
 232         #pdecode = create_pdecode()
 233         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 234         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 235                                      opkls=IssuerDecode2ToOperand,
 236                                      svp64_en=self.svp64_en,
 237                                      regreduce_en=self.regreduce_en)
 238         pdecode = self.pdecode2.dec
 239
 240         if self.svp64_en:
 241             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 242
 243         # Test Instruction memory
 244         if hasattr(core, "icache"):
 245             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 246             # truly dreadful.  needs a huge reorg.
 247             pspec.icache = core.icache
 248         self.imem = ConfigFetchUnit(pspec).fu
 249
 250         # DMI interface
 251         self.dbg = CoreDebug()
 252
 253         # instruction go/monitor
 254         self.pc_o = Signal(64, reset_less=True)
 255         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 256         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 257         self.svstate_i = Data(64, "svstate_i")  # ditto
 258         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 259         self.busy_o = Signal(reset_less=True)
 260         self.memerr_o = Signal(reset_less=True)
 261
 262         # STATE regfile read /write ports for PC, MSR, SVSTATE
 263         staterf = self.core.regs.rf['state']
 264         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 265         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 266         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 267
 268         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 269         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 270         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 271
 272         # DMI interface access
 273         intrf = self.core.regs.rf['int']
 274         crrf = self.core.regs.rf['cr']
 275         xerrf = self.core.regs.rf['xer']
 276         self.int_r = intrf.r_ports['dmi']  # INT read
 277         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 278         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 279
 280         if self.svp64_en:
 281             # for predication
 282             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 283             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 284
 285         # hack method of keeping an eye on whether branch/trap set the PC
 286         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 287         self.state_nia.wen.name = 'state_nia_wen'
 288
 289         # pulse to synchronize the simulator at instruction end
 290         self.insn_done = Signal()
 291
 292         # indicate any instruction still outstanding, in execution
 293         self.any_busy = Signal()
 294
 295         if self.svp64_en:
 296             # store copies of predicate masks
 297             self.srcmask = Signal(64)
 298             self.dstmask = Signal(64)
 299
 300     def setup_peripherals(self, m):
 301         comb, sync = m.d.comb, m.d.sync
 302
 303         # okaaaay so the debug module must be in coresync clock domain
 304         # but NOT its reset signal. to cope with this, set every single
 305         # submodule explicitly in coresync domain, debug and JTAG
 306         # in their own one but using *external* reset.
 307         csd = DomainRenamer("coresync")
 308         dbd = DomainRenamer(self.dbg_domain)
 309
 310         m.submodules.core = core = csd(self.core)
 311         # this _so_ needs sorting out.  ICache is added down inside
 312         # LoadStore1 and is already a submodule of LoadStore1
 313         if not isinstance(self.imem, ICache):
 314             m.submodules.imem = imem = csd(self.imem)
 315         m.submodules.dbg = dbg = dbd(self.dbg)
 316         if self.jtag_en:
 317             m.submodules.jtag = jtag = dbd(self.jtag)
 318             # TODO: UART2GDB mux, here, from external pin
 319             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
 320             sync += dbg.dmi.connect_to(jtag.dmi)
 321
 322         cur_state = self.cur_state
 323
 324         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
 325         if self.sram4x4k:
 326             for i, sram in enumerate(self.sram4k):
 327                 m.submodules["sram4k_%d" % i] = csd(sram)
 328                 comb += sram.enable.eq(self.wb_sram_en)
 329
 330         # XICS interrupt handler
 331         if self.xics:
 332             m.submodules.xics_icp = icp = csd(self.xics_icp)
 333             m.submodules.xics_ics = ics = csd(self.xics_ics)
 334             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
 335             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
 336
 337         # GPIO test peripheral
 338         if self.gpio:
 339             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
 340
 341         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
 342         # XXX causes litex ECP5 test to get wrong idea about input and output
 343         # (but works with verilator sim *sigh*)
 344         # if self.gpio and self.xics:
 345         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
 346
 347         # instruction decoder
 348         pdecode = create_pdecode()
 349         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
 350         if self.svp64_en:
 351             m.submodules.svp64 = svp64 = csd(self.svp64)
 352
 353         # convenience
 354         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 355         intrf = self.core.regs.rf['int']
 356
 357         # clock delay power-on reset
 358         cd_por = ClockDomain(reset_less=True)
 359         cd_sync = ClockDomain()
 360         core_sync = ClockDomain("coresync")
 361         m.domains += cd_por, cd_sync, core_sync
 362         if self.dbg_domain != "sync":
 363             dbg_sync = ClockDomain(self.dbg_domain)
 364             m.domains += dbg_sync
 365
 366         ti_rst = Signal(reset_less=True)
 367         delay = Signal(range(4), reset=3)
 368         with m.If(delay != 0):
 369             m.d.por += delay.eq(delay - 1)
 370         comb += cd_por.clk.eq(ClockSignal())
 371
 372         # power-on reset delay
 373         core_rst = ResetSignal("coresync")
 374         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
 375         comb += core_rst.eq(ti_rst)
 376
 377         # debug clock is same as coresync, but reset is *main external*
 378         if self.dbg_domain != "sync":
 379             dbg_rst = ResetSignal(self.dbg_domain)
 380             comb += dbg_rst.eq(ResetSignal())
 381
 382         # busy/halted signals from core
 383         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
 384         comb += self.busy_o.eq(core_busy_o)
 385         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
 386
 387         # temporary hack: says "go" immediately for both address gen and ST
 388         l0 = core.l0
 389         ldst = core.fus.fus['ldst0']
 390         st_go_edge = rising_edge(m, ldst.st.rel_o)
 391         # link addr-go direct to rel
 392         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
 393         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
 394
 395     def do_dmi(self, m, dbg):
 396         """deals with DMI debug requests
 397
 398         currently only provides read requests for the INT regfile, CR and XER
 399         it will later also deal with *writing* to these regfiles.
 400         """
 401         comb = m.d.comb
 402         sync = m.d.sync
 403         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
 404         intrf = self.core.regs.rf['int']
 405
 406         with m.If(d_reg.req):  # request for regfile access being made
 407             # TODO: error-check this
 408             # XXX should this be combinatorial?  sync better?
 409             if intrf.unary:
 410                 comb += self.int_r.ren.eq(1 << d_reg.addr)
 411             else:
 412                 comb += self.int_r.addr.eq(d_reg.addr)
 413                 comb += self.int_r.ren.eq(1)
 414         d_reg_delay = Signal()
 415         sync += d_reg_delay.eq(d_reg.req)
 416         with m.If(d_reg_delay):
 417             # data arrives one clock later
 418             comb += d_reg.data.eq(self.int_r.o_data)
 419             comb += d_reg.ack.eq(1)
 420
 421         # sigh same thing for CR debug
 422         with m.If(d_cr.req):  # request for regfile access being made
 423             comb += self.cr_r.ren.eq(0b11111111)  # enable all
 424         d_cr_delay = Signal()
 425         sync += d_cr_delay.eq(d_cr.req)
 426         with m.If(d_cr_delay):
 427             # data arrives one clock later
 428             comb += d_cr.data.eq(self.cr_r.o_data)
 429             comb += d_cr.ack.eq(1)
 430
 431         # aaand XER...
 432         with m.If(d_xer.req):  # request for regfile access being made
 433             comb += self.xer_r.ren.eq(0b111111)  # enable all
 434         d_xer_delay = Signal()
 435         sync += d_xer_delay.eq(d_xer.req)
 436         with m.If(d_xer_delay):
 437             # data arrives one clock later
 438             comb += d_xer.data.eq(self.xer_r.o_data)
 439             comb += d_xer.ack.eq(1)
 440
 441     def tb_dec_fsm(self, m, spr_dec):
 442         """tb_dec_fsm
 443
 444         this is a FSM for updating either dec or tb.  it runs alternately
 445         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
 446         value to DEC, however the regfile has "passthrough" on it so this
 447         *should* be ok.
 448
 449         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
 450         """
 451
 452         comb, sync = m.d.comb, m.d.sync
 453         fast_rf = self.core.regs.rf['fast']
 454         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
 455         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
 456
 457         with m.FSM() as fsm:
 458
 459             # initiates read of current DEC
 460             with m.State("DEC_READ"):
 461                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
 462                 comb += fast_r_dectb.ren.eq(1)
 463                 m.next = "DEC_WRITE"
 464
 465             # waits for DEC read to arrive (1 cycle), updates with new value
 466             with m.State("DEC_WRITE"):
 467                 new_dec = Signal(64)
 468                 # TODO: MSR.LPCR 32-bit decrement mode
 469                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
 470                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
 471                 comb += fast_w_dectb.wen.eq(1)
 472                 comb += fast_w_dectb.i_data.eq(new_dec)
 473                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
 474                 m.next = "TB_READ"
 475
 476             # initiates read of current TB
 477             with m.State("TB_READ"):
 478                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
 479                 comb += fast_r_dectb.ren.eq(1)
 480                 m.next = "TB_WRITE"
 481
 482             # waits for read TB to arrive, initiates write of current TB
 483             with m.State("TB_WRITE"):
 484                 new_tb = Signal(64)
 485                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
 486                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
 487                 comb += fast_w_dectb.wen.eq(1)
 488                 comb += fast_w_dectb.i_data.eq(new_tb)
 489                 m.next = "DEC_READ"
 490
 491         return m
 492
 493     def __iter__(self):
 494         yield from self.pc_i.ports()
 495         yield from self.msr_i.ports()
 496         yield self.pc_o
 497         yield self.memerr_o
 498         yield from self.core.ports()
 499         yield from self.imem.ports()
 500         yield self.core_bigendian_i
 501         yield self.busy_o
 502
 503     def ports(self):
 504         return list(self)
 505
 506     def external_ports(self):
 507         ports = self.pc_i.ports()
 508         ports = self.msr_i.ports()
 509         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
 510                   ]
 511
 512         if self.jtag_en:
 513             ports += list(self.jtag.external_ports())
 514         else:
 515             # don't add DMI if JTAG is enabled
 516             ports += list(self.dbg.dmi.ports())
 517
 518         ports += list(self.imem.ibus.fields.values())
 519         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
 520
 521         if self.sram4x4k:
 522             for sram in self.sram4k:
 523                 ports += list(sram.bus.fields.values())
 524
 525         if self.xics:
 526             ports += list(self.xics_icp.bus.fields.values())
 527             ports += list(self.xics_ics.bus.fields.values())
 528             ports.append(self.int_level_i)
 529
 530         if self.gpio:
 531             ports += list(self.simple_gpio.bus.fields.values())
 532             ports.append(self.gpio_o)
 533
 534         return ports
 535
 536     def ports(self):
 537         return list(self)
 538
 539
 540
 541 # Fetch Finite State Machine.
 542 # WARNING: there are currently DriverConflicts but it's actually working.
 543 # TODO, here: everything that is global in nature, information from the
 544 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 545 # not only that: TestIssuerInternal.imem can entirely move into here
 546 # because imem is only ever accessed inside the FetchFSM.
 547 class FetchFSM(ControlBase):
 548     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 549                  pdecode2, cur_state,
 550                  dbg, core, svstate, nia, is_svp64_mode):
 551         self.allow_overlap = allow_overlap
 552         self.svp64_en = svp64_en
 553         self.imem = imem
 554         self.core_rst = core_rst
 555         self.pdecode2 = pdecode2
 556         self.cur_state = cur_state
 557         self.dbg = dbg
 558         self.core = core
 559         self.svstate = svstate
 560         self.nia = nia
 561         self.is_svp64_mode = is_svp64_mode
 562
 563         # set up pipeline ControlBase and allocate i/o specs
 564         # (unusual: normally done by the Pipeline API)
 565         super().__init__(stage=self)
 566         self.p.i_data, self.n.o_data = self.new_specs(None)
 567         self.i, self.o = self.p.i_data, self.n.o_data
 568
 569     # next 3 functions are Stage API Compliance
 570     def setup(self, m, i):
 571         pass
 572
 573     def ispec(self):
 574         return FetchInput()
 575
 576     def ospec(self):
 577         return FetchOutput()
 578
 579     def elaborate(self, platform):
 580         """fetch FSM
 581
 582         this FSM performs fetch of raw instruction data, partial-decodes
 583         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 584         read a 2nd 32-bit quantity if that occurs.
 585         """
 586         m = super().elaborate(platform)
 587
 588         dbg = self.dbg
 589         core = self.core
 590         pc = self.i.pc
 591         msr = self.i.msr
 592         svstate = self.svstate
 593         nia = self.nia
 594         is_svp64_mode = self.is_svp64_mode
 595         fetch_pc_o_ready = self.p.o_ready
 596         fetch_pc_i_valid = self.p.i_valid
 597         fetch_insn_o_valid = self.n.o_valid
 598         fetch_insn_i_ready = self.n.i_ready
 599
 600         comb = m.d.comb
 601         sync = m.d.sync
 602         pdecode2 = self.pdecode2
 603         cur_state = self.cur_state
 604         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 605
 606         # also note instruction fetch failed
 607         if hasattr(core, "icache"):
 608             fetch_failed = core.icache.i_out.fetch_failed
 609             flush_needed = True
 610         else:
 611             fetch_failed = Const(0, 1)
 612             flush_needed = False
 613
 614         with m.FSM(name='fetch_fsm'):
 615
 616             # waiting (zzz)
 617             with m.State("IDLE"):
 618                 with m.If(~dbg.stopping_o & ~fetch_failed):
 619                     comb += fetch_pc_o_ready.eq(1)
 620                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 621                     # instruction allowed to go: start by reading the PC
 622                     # capture the PC and also drop it into Insn Memory
 623                     # we have joined a pair of combinatorial memory
 624                     # lookups together.  this is Generally Bad.
 625                     comb += self.imem.a_pc_i.eq(pc)
 626                     comb += self.imem.a_i_valid.eq(1)
 627                     comb += self.imem.f_i_valid.eq(1)
 628                     sync += cur_state.pc.eq(pc)
 629                     sync += cur_state.svstate.eq(svstate)  # and svstate
 630                     sync += cur_state.msr.eq(msr)  # and msr
 631
 632                     m.next = "INSN_READ"  # move to "wait for bus" phase
 633
 634             # dummy pause to find out why simulation is not keeping up
 635             with m.State("INSN_READ"):
 636                 if self.allow_overlap:
 637                     stopping = dbg.stopping_o
 638                 else:
 639                     stopping = Const(0)
 640                 with m.If(stopping):
 641                     # stopping: jump back to idle
 642                     m.next = "IDLE"
 643                 with m.Else():
 644                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 645                         # busy but not fetch failed: stay in wait-read
 646                         comb += self.imem.a_i_valid.eq(1)
 647                         comb += self.imem.f_i_valid.eq(1)
 648                     with m.Else():
 649                         # not busy (or fetch failed!): instruction fetched
 650                         # when fetch failed, the instruction gets ignored
 651                         # by the decoder
 652                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 653                         if self.svp64_en:
 654                             svp64 = self.svp64
 655                             # decode the SVP64 prefix, if any
 656                             comb += svp64.raw_opcode_in.eq(insn)
 657                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 658                             # pass the decoded prefix (if any) to PowerDecoder2
 659                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 660                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 661                             # remember whether this is a prefixed instruction,
 662                             # so the FSM can readily loop when VL==0
 663                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 664                             # calculate the address of the following instruction
 665                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 666                             sync += nia.eq(cur_state.pc + insn_size)
 667                             with m.If(~svp64.is_svp64_mode):
 668                                 # with no prefix, store the instruction
 669                                 # and hand it directly to the next FSM
 670                                 sync += dec_opcode_o.eq(insn)
 671                                 m.next = "INSN_READY"
 672                             with m.Else():
 673                                 # fetch the rest of the instruction from memory
 674                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 675                                 comb += self.imem.a_i_valid.eq(1)
 676                                 comb += self.imem.f_i_valid.eq(1)
 677                                 m.next = "INSN_READ2"
 678                         else:
 679                             # not SVP64 - 32-bit only
 680                             sync += nia.eq(cur_state.pc + 4)
 681                             sync += dec_opcode_o.eq(insn)
 682                             m.next = "INSN_READY"
 683
 684             with m.State("INSN_READ2"):
 685                 with m.If(self.imem.f_busy_o):  # zzz...
 686                     # busy: stay in wait-read
 687                     comb += self.imem.a_i_valid.eq(1)
 688                     comb += self.imem.f_i_valid.eq(1)
 689                 with m.Else():
 690                     # not busy: instruction fetched
 691                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 692                     sync += dec_opcode_o.eq(insn)
 693                     m.next = "INSN_READY"
 694                     # TODO: probably can start looking at pdecode2.rm_dec
 695                     # here or maybe even in INSN_READ state, if svp64_mode
 696                     # detected, in order to trigger - and wait for - the
 697                     # predicate reading.
 698                     if self.svp64_en:
 699                         pmode = pdecode2.rm_dec.predmode
 700                     """
 701                     if pmode != SVP64PredMode.ALWAYS.value:
 702                         fire predicate loading FSM and wait before
 703                         moving to INSN_READY
 704                     else:
 705                         sync += self.srcmask.eq(-1) # set to all 1s
 706                         sync += self.dstmask.eq(-1) # set to all 1s
 707                         m.next = "INSN_READY"
 708                     """
 709
 710             with m.State("INSN_READY"):
 711                 # hand over the instruction, to be decoded
 712                 comb += fetch_insn_o_valid.eq(1)
 713                 with m.If(fetch_insn_i_ready):
 714                     m.next = "IDLE"
 715
 716         # whatever was done above, over-ride it if core reset is held
 717         with m.If(self.core_rst):
 718             sync += nia.eq(0)
 719
 720         return m
 721
 722
 723 class TestIssuerInternal(TestIssuerBase, Elaboratable):
 724     """TestIssuer - reads instructions from TestMemory and issues them
 725
 726     efficiency and speed is not the main goal here: functional correctness
 727     and code clarity is.  optimisations (which almost 100% interfere with
 728     easy understanding) come later.
 729     """
 730
 731     def fetch_predicate_fsm(self, m,
 732                             pred_insn_i_valid, pred_insn_o_ready,
 733                             pred_mask_o_valid, pred_mask_i_ready):
 734         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 735            src/dest predicate masks
 736
 737         https://bugs.libre-soc.org/show_bug.cgi?id=617
 738         the predicates can be read here, by using IntRegs r_ports['pred']
 739         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 740         be done through multiple reads, extracting one relevant at a time.
 741         later, a faster way would be to use the 32-bit-wide CR port but
 742         this is more complex decoding, here.  equivalent code used in
 743         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 744
 745         note: this ENTIRE FSM is not to be called when svp64 is disabled
 746         """
 747         comb = m.d.comb
 748         sync = m.d.sync
 749         pdecode2 = self.pdecode2
 750         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 751         predmode = rm_dec.predmode
 752         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 753         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 754         # get src/dst step, so we can skip already used mask bits
 755         cur_state = self.cur_state
 756         srcstep = cur_state.svstate.srcstep
 757         dststep = cur_state.svstate.dststep
 758         cur_vl = cur_state.svstate.vl
 759
 760         # decode predicates
 761         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 762         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 763         sidx, scrinvert = get_predcr(m, srcpred, 's')
 764         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 765
 766         # store fetched masks, for either intpred or crpred
 767         # when src/dst step is not zero, the skipped mask bits need to be
 768         # shifted-out, before actually storing them in src/dest mask
 769         new_srcmask = Signal(64, reset_less=True)
 770         new_dstmask = Signal(64, reset_less=True)
 771
 772         with m.FSM(name="fetch_predicate"):
 773
 774             with m.State("FETCH_PRED_IDLE"):
 775                 comb += pred_insn_o_ready.eq(1)
 776                 with m.If(pred_insn_i_valid):
 777                     with m.If(predmode == SVP64PredMode.INT):
 778                         # skip fetching destination mask register, when zero
 779                         with m.If(dall1s):
 780                             sync += new_dstmask.eq(-1)
 781                             # directly go to fetch source mask register
 782                             # guaranteed not to be zero (otherwise predmode
 783                             # would be SVP64PredMode.ALWAYS, not INT)
 784                             comb += int_pred.addr.eq(sregread)
 785                             comb += int_pred.ren.eq(1)
 786                             m.next = "INT_SRC_READ"
 787                         # fetch destination predicate register
 788                         with m.Else():
 789                             comb += int_pred.addr.eq(dregread)
 790                             comb += int_pred.ren.eq(1)
 791                             m.next = "INT_DST_READ"
 792                     with m.Elif(predmode == SVP64PredMode.CR):
 793                         # go fetch masks from the CR register file
 794                         sync += new_srcmask.eq(0)
 795                         sync += new_dstmask.eq(0)
 796                         m.next = "CR_READ"
 797                     with m.Else():
 798                         sync += self.srcmask.eq(-1)
 799                         sync += self.dstmask.eq(-1)
 800                         m.next = "FETCH_PRED_DONE"
 801
 802             with m.State("INT_DST_READ"):
 803                 # store destination mask
 804                 inv = Repl(dinvert, 64)
 805                 with m.If(dunary):
 806                     # set selected mask bit for 1<<r3 mode
 807                     dst_shift = Signal(range(64))
 808                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 809                     sync += new_dstmask.eq(1 << dst_shift)
 810                 with m.Else():
 811                     # invert mask if requested
 812                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 813                 # skip fetching source mask register, when zero
 814                 with m.If(sall1s):
 815                     sync += new_srcmask.eq(-1)
 816                     m.next = "FETCH_PRED_SHIFT_MASK"
 817                 # fetch source predicate register
 818                 with m.Else():
 819                     comb += int_pred.addr.eq(sregread)
 820                     comb += int_pred.ren.eq(1)
 821                     m.next = "INT_SRC_READ"
 822
 823             with m.State("INT_SRC_READ"):
 824                 # store source mask
 825                 inv = Repl(sinvert, 64)
 826                 with m.If(sunary):
 827                     # set selected mask bit for 1<<r3 mode
 828                     src_shift = Signal(range(64))
 829                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 830                     sync += new_srcmask.eq(1 << src_shift)
 831                 with m.Else():
 832                     # invert mask if requested
 833                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 834                 m.next = "FETCH_PRED_SHIFT_MASK"
 835
 836             # fetch masks from the CR register file
 837             # implements the following loop:
 838             # idx, inv = get_predcr(mask)
 839             # mask = 0
 840             # for cr_idx in range(vl):
 841             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 842             #     if cr[idx] ^ inv:
 843             #         mask |= 1 << cr_idx
 844             # return mask
 845             with m.State("CR_READ"):
 846                 # CR index to be read, which will be ready by the next cycle
 847                 cr_idx = Signal.like(cur_vl, reset_less=True)
 848                 # submit the read operation to the regfile
 849                 with m.If(cr_idx != cur_vl):
 850                     # the CR read port is unary ...
 851                     # ren = 1 << cr_idx
 852                     # ... in MSB0 convention ...
 853                     # ren = 1 << (7 - cr_idx)
 854                     # ... and with an offset:
 855                     # ren = 1 << (7 - off - cr_idx)
 856                     idx = SVP64CROffs.CRPred + cr_idx
 857                     comb += cr_pred.ren.eq(1 << (7 - idx))
 858                     # signal data valid in the next cycle
 859                     cr_read = Signal(reset_less=True)
 860                     sync += cr_read.eq(1)
 861                     # load the next index
 862                     sync += cr_idx.eq(cr_idx + 1)
 863                 with m.Else():
 864                     # exit on loop end
 865                     sync += cr_read.eq(0)
 866                     sync += cr_idx.eq(0)
 867                     m.next = "FETCH_PRED_SHIFT_MASK"
 868                 with m.If(cr_read):
 869                     # compensate for the one cycle delay on the regfile
 870                     cur_cr_idx = Signal.like(cur_vl)
 871                     comb += cur_cr_idx.eq(cr_idx - 1)
 872                     # read the CR field, select the appropriate bit
 873                     cr_field = Signal(4)
 874                     scr_bit = Signal()
 875                     dcr_bit = Signal()
 876                     comb += cr_field.eq(cr_pred.o_data)
 877                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 878                                        ^ scrinvert)
 879                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 880                                        ^ dcrinvert)
 881                     # set the corresponding mask bit
 882                     bit_to_set = Signal.like(self.srcmask)
 883                     comb += bit_to_set.eq(1 << cur_cr_idx)
 884                     with m.If(scr_bit):
 885                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 886                     with m.If(dcr_bit):
 887                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 888
 889             with m.State("FETCH_PRED_SHIFT_MASK"):
 890                 # shift-out skipped mask bits
 891                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 892                 sync += self.dstmask.eq(new_dstmask >> dststep)
 893                 m.next = "FETCH_PRED_DONE"
 894
 895             with m.State("FETCH_PRED_DONE"):
 896                 comb += pred_mask_o_valid.eq(1)
 897                 with m.If(pred_mask_i_ready):
 898                     m.next = "FETCH_PRED_IDLE"
 899
 900     def issue_fsm(self, m, core, msr_changed, pc_changed, sv_changed, nia,
 901                   dbg, core_rst, is_svp64_mode,
 902                   fetch_pc_o_ready, fetch_pc_i_valid,
 903                   fetch_insn_o_valid, fetch_insn_i_ready,
 904                   pred_insn_i_valid, pred_insn_o_ready,
 905                   pred_mask_o_valid, pred_mask_i_ready,
 906                   exec_insn_i_valid, exec_insn_o_ready,
 907                   exec_pc_o_valid, exec_pc_i_ready):
 908         """issue FSM
 909
 910         decode / issue FSM.  this interacts with the "fetch" FSM
 911         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 912         (outgoing). also interacts with the "execute" FSM
 913         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 914         (incoming).
 915         SVP64 RM prefixes have already been set up by the
 916         "fetch" phase, so execute is fairly straightforward.
 917         """
 918
 919         comb = m.d.comb
 920         sync = m.d.sync
 921         pdecode2 = self.pdecode2
 922         cur_state = self.cur_state
 923
 924         # temporaries
 925         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 926
 927         # for updating svstate (things like srcstep etc.)
 928         update_svstate = Signal()  # set this (below) if updating
 929         new_svstate = SVSTATERec("new_svstate")
 930         comb += new_svstate.eq(cur_state.svstate)
 931
 932         # precalculate srcstep+1 and dststep+1
 933         cur_srcstep = cur_state.svstate.srcstep
 934         cur_dststep = cur_state.svstate.dststep
 935         next_srcstep = Signal.like(cur_srcstep)
 936         next_dststep = Signal.like(cur_dststep)
 937         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 938         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 939
 940         # note if an exception happened.  in a pipelined or OoO design
 941         # this needs to be accompanied by "shadowing" (or stalling)
 942         exc_happened = self.core.o.exc_happened
 943         # also note instruction fetch failed
 944         if hasattr(core, "icache"):
 945             fetch_failed = core.icache.i_out.fetch_failed
 946             flush_needed = True
 947             # set to fault in decoder
 948             # update (highest priority) instruction fault
 949             rising_fetch_failed = rising_edge(m, fetch_failed)
 950             with m.If(rising_fetch_failed):
 951                 sync += pdecode2.instr_fault.eq(1)
 952         else:
 953             fetch_failed = Const(0, 1)
 954             flush_needed = False
 955
 956         with m.FSM(name="issue_fsm"):
 957
 958             # sync with the "fetch" phase which is reading the instruction
 959             # at this point, there is no instruction running, that
 960             # could inadvertently update the PC.
 961             with m.State("ISSUE_START"):
 962                 # reset instruction fault
 963                 sync += pdecode2.instr_fault.eq(0)
 964                 # wait on "core stop" release, before next fetch
 965                 # need to do this here, in case we are in a VL==0 loop
 966                 with m.If(~dbg.core_stop_o & ~core_rst):
 967                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 968                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 969                         m.next = "INSN_WAIT"
 970                 with m.Else():
 971                     # tell core it's stopped, and acknowledge debug handshake
 972                     comb += dbg.core_stopped_i.eq(1)
 973                     # while stopped, allow updating the MSR, PC and SVSTATE
 974                     with m.If(self.pc_i.ok):
 975                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 976                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 977                         sync += pc_changed.eq(1)
 978                     with m.If(self.msr_i.ok):
 979                         comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 980                         comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 981                         sync += msr_changed.eq(1)
 982                     with m.If(self.svstate_i.ok):
 983                         comb += new_svstate.eq(self.svstate_i.data)
 984                         comb += update_svstate.eq(1)
 985                         sync += sv_changed.eq(1)
 986
 987             # wait for an instruction to arrive from Fetch
 988             with m.State("INSN_WAIT"):
 989                 if self.allow_overlap:
 990                     stopping = dbg.stopping_o
 991                 else:
 992                     stopping = Const(0)
 993                 with m.If(stopping):
 994                     # stopping: jump back to idle
 995                     m.next = "ISSUE_START"
 996                     if flush_needed:
 997                         # request the icache to stop asserting "failed"
 998                         comb += core.icache.flush_in.eq(1)
 999                     # stop instruction fault
1000                     sync += pdecode2.instr_fault.eq(0)
1001                 with m.Else():
1002                     comb += fetch_insn_i_ready.eq(1)
1003                     with m.If(fetch_insn_o_valid):
1004                         # loop into ISSUE_START if it's a SVP64 instruction
1005                         # and VL == 0.  this because VL==0 is a for-loop
1006                         # from 0 to 0 i.e. always, always a NOP.
1007                         cur_vl = cur_state.svstate.vl
1008                         with m.If(is_svp64_mode & (cur_vl == 0)):
1009                             # update the PC before fetching the next instruction
1010                             # since we are in a VL==0 loop, no instruction was
1011                             # executed that we could be overwriting
1012                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1013                             comb += self.state_w_pc.i_data.eq(nia)
1014                             comb += self.insn_done.eq(1)
1015                             m.next = "ISSUE_START"
1016                         with m.Else():
1017                             if self.svp64_en:
1018                                 m.next = "PRED_START"  # fetching predicate
1019                             else:
1020                                 m.next = "DECODE_SV"  # skip predication
1021
1022             with m.State("PRED_START"):
1023                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
1024                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
1025                     m.next = "MASK_WAIT"
1026
1027             with m.State("MASK_WAIT"):
1028                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
1029                 with m.If(pred_mask_o_valid):  # predication masks are ready
1030                     m.next = "PRED_SKIP"
1031
1032             # skip zeros in predicate
1033             with m.State("PRED_SKIP"):
1034                 with m.If(~is_svp64_mode):
1035                     m.next = "DECODE_SV"  # nothing to do
1036                 with m.Else():
1037                     if self.svp64_en:
1038                         pred_src_zero = pdecode2.rm_dec.pred_sz
1039                         pred_dst_zero = pdecode2.rm_dec.pred_dz
1040
1041                         # new srcstep, after skipping zeros
1042                         skip_srcstep = Signal.like(cur_srcstep)
1043                         # value to be added to the current srcstep
1044                         src_delta = Signal.like(cur_srcstep)
1045                         # add leading zeros to srcstep, if not in zero mode
1046                         with m.If(~pred_src_zero):
1047                             # priority encoder (count leading zeros)
1048                             # append guard bit, in case the mask is all zeros
1049                             pri_enc_src = PriorityEncoder(65)
1050                             m.submodules.pri_enc_src = pri_enc_src
1051                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
1052                                                          Const(1, 1)))
1053                             comb += src_delta.eq(pri_enc_src.o)
1054                         # apply delta to srcstep
1055                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
1056                         # shift-out all leading zeros from the mask
1057                         # plus the leading "one" bit
1058                         # TODO count leading zeros and shift-out the zero
1059                         #      bits, in the same step, in hardware
1060                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1061
1062                         # same as above, but for dststep
1063                         skip_dststep = Signal.like(cur_dststep)
1064                         dst_delta = Signal.like(cur_dststep)
1065                         with m.If(~pred_dst_zero):
1066                             pri_enc_dst = PriorityEncoder(65)
1067                             m.submodules.pri_enc_dst = pri_enc_dst
1068                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1069                                                          Const(1, 1)))
1070                             comb += dst_delta.eq(pri_enc_dst.o)
1071                         comb += skip_dststep.eq(cur_dststep + dst_delta)
1072                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1073
1074                         # TODO: initialize mask[VL]=1 to avoid passing past VL
1075                         with m.If((skip_srcstep >= cur_vl) |
1076                                   (skip_dststep >= cur_vl)):
1077                             # end of VL loop. Update PC and reset src/dst step
1078                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1079                             comb += self.state_w_pc.i_data.eq(nia)
1080                             comb += new_svstate.srcstep.eq(0)
1081                             comb += new_svstate.dststep.eq(0)
1082                             comb += update_svstate.eq(1)
1083                             # synchronize with the simulator
1084                             comb += self.insn_done.eq(1)
1085                             # go back to Issue
1086                             m.next = "ISSUE_START"
1087                         with m.Else():
1088                             # update new src/dst step
1089                             comb += new_svstate.srcstep.eq(skip_srcstep)
1090                             comb += new_svstate.dststep.eq(skip_dststep)
1091                             comb += update_svstate.eq(1)
1092                             # proceed to Decode
1093                             m.next = "DECODE_SV"
1094
1095                         # pass predicate mask bits through to satellite decoders
1096                         # TODO: for SIMD this will be *multiple* bits
1097                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1098                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1099
1100             # after src/dst step have been updated, we are ready
1101             # to decode the instruction
1102             with m.State("DECODE_SV"):
1103                 # decode the instruction
1104                 with m.If(~fetch_failed):
1105                     sync += pdecode2.instr_fault.eq(0)
1106                 sync += core.i.e.eq(pdecode2.e)
1107                 sync += core.i.state.eq(cur_state)
1108                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1109                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1110                 if self.svp64_en:
1111                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1112                     # set RA_OR_ZERO detection in satellite decoders
1113                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1114                     # and svp64 detection
1115                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1116                     # and svp64 bit-rev'd ldst mode
1117                     ldst_dec = pdecode2.use_svp64_ldst_dec
1118                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1119                 # after decoding, reset any previous exception condition,
1120                 # allowing it to be set again during the next execution
1121                 sync += pdecode2.ldst_exc.eq(0)
1122
1123                 m.next = "INSN_EXECUTE"  # move to "execute"
1124
1125             # handshake with execution FSM, move to "wait" once acknowledged
1126             with m.State("INSN_EXECUTE"):
1127                 comb += exec_insn_i_valid.eq(1)  # trigger execute
1128                 with m.If(exec_insn_o_ready):   # execute acknowledged us
1129                     m.next = "EXECUTE_WAIT"
1130
1131             with m.State("EXECUTE_WAIT"):
1132                 # wait on "core stop" release, at instruction end
1133                 # need to do this here, in case we are in a VL>1 loop
1134                 with m.If(~dbg.core_stop_o & ~core_rst):
1135                     comb += exec_pc_i_ready.eq(1)
1136                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1137                     # the exception info needs to be blatted into
1138                     # pdecode.ldst_exc, and the instruction "re-run".
1139                     # when ldst_exc.happened is set, the PowerDecoder2
1140                     # reacts very differently: it re-writes the instruction
1141                     # with a "trap" (calls PowerDecoder2.trap()) which
1142                     # will *overwrite* whatever was requested and jump the
1143                     # PC to the exception address, as well as alter MSR.
1144                     # nothing else needs to be done other than to note
1145                     # the change of PC and MSR (and, later, SVSTATE)
1146                     with m.If(exc_happened):
1147                         mmu = core.fus.get_exc("mmu0")
1148                         ldst = core.fus.get_exc("ldst0")
1149                         if mmu is not None:
1150                             with m.If(fetch_failed):
1151                                 # instruction fetch: exception is from MMU
1152                                 # reset instr_fault (highest priority)
1153                                 sync += pdecode2.ldst_exc.eq(mmu)
1154                                 sync += pdecode2.instr_fault.eq(0)
1155                                 if flush_needed:
1156                                     # request icache to stop asserting "failed"
1157                                     comb += core.icache.flush_in.eq(1)
1158                         with m.If(~fetch_failed):
1159                             # otherwise assume it was a LDST exception
1160                             sync += pdecode2.ldst_exc.eq(ldst)
1161
1162                     with m.If(exec_pc_o_valid):
1163
1164                         # was this the last loop iteration?
1165                         is_last = Signal()
1166                         cur_vl = cur_state.svstate.vl
1167                         comb += is_last.eq(next_srcstep == cur_vl)
1168
1169                         # return directly to Decode if Execute generated an
1170                         # exception.
1171                         with m.If(pdecode2.ldst_exc.happened):
1172                             m.next = "DECODE_SV"
1173
1174                         # if MSR, PC or SVSTATE were changed by the previous
1175                         # instruction, go directly back to Fetch, without
1176                         # updating either MSR PC or SVSTATE
1177                         with m.Elif(msr_changed | pc_changed | sv_changed):
1178                             m.next = "ISSUE_START"
1179
1180                         # also return to Fetch, when no output was a vector
1181                         # (regardless of SRCSTEP and VL), or when the last
1182                         # instruction was really the last one of the VL loop
1183                         with m.Elif((~pdecode2.loop_continue) | is_last):
1184                             # before going back to fetch, update the PC state
1185                             # register with the NIA.
1186                             # ok here we are not reading the branch unit.
1187                             # TODO: this just blithely overwrites whatever
1188                             #       pipeline updated the PC
1189                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1190                             comb += self.state_w_pc.i_data.eq(nia)
1191                             # reset SRCSTEP before returning to Fetch
1192                             if self.svp64_en:
1193                                 with m.If(pdecode2.loop_continue):
1194                                     comb += new_svstate.srcstep.eq(0)
1195                                     comb += new_svstate.dststep.eq(0)
1196                                     comb += update_svstate.eq(1)
1197                             else:
1198                                 comb += new_svstate.srcstep.eq(0)
1199                                 comb += new_svstate.dststep.eq(0)
1200                                 comb += update_svstate.eq(1)
1201                             m.next = "ISSUE_START"
1202
1203                         # returning to Execute? then, first update SRCSTEP
1204                         with m.Else():
1205                             comb += new_svstate.srcstep.eq(next_srcstep)
1206                             comb += new_svstate.dststep.eq(next_dststep)
1207                             comb += update_svstate.eq(1)
1208                             # return to mask skip loop
1209                             m.next = "PRED_SKIP"
1210
1211                 with m.Else():
1212                     comb += dbg.core_stopped_i.eq(1)
1213                     if flush_needed:
1214                         # request the icache to stop asserting "failed"
1215                         comb += core.icache.flush_in.eq(1)
1216                     # stop instruction fault
1217                     sync += pdecode2.instr_fault.eq(0)
1218                     if flush_needed:
1219                         # request the icache to stop asserting "failed"
1220                         comb += core.icache.flush_in.eq(1)
1221                     # stop instruction fault
1222                     sync += pdecode2.instr_fault.eq(0)
1223                     # while stopped, allow updating the MSR, PC and SVSTATE
1224                     with m.If(self.msr_i.ok):
1225                         comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
1226                         comb += self.state_w_msr.i_data.eq(self.msr_i.data)
1227                         sync += msr_changed.eq(1)
1228                     with m.If(self.pc_i.ok):
1229                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1230                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
1231                         sync += pc_changed.eq(1)
1232                     with m.If(self.svstate_i.ok):
1233                         comb += new_svstate.eq(self.svstate_i.data)
1234                         comb += update_svstate.eq(1)
1235                         sync += sv_changed.eq(1)
1236
1237         # check if svstate needs updating: if so, write it to State Regfile
1238         with m.If(update_svstate):
1239             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
1240             comb += self.state_w_sv.i_data.eq(new_svstate)
1241             sync += cur_state.svstate.eq(new_svstate)  # for next clock
1242
1243     def execute_fsm(self, m, core, msr_changed, pc_changed, sv_changed,
1244                     exec_insn_i_valid, exec_insn_o_ready,
1245                     exec_pc_o_valid, exec_pc_i_ready):
1246         """execute FSM
1247
1248         execute FSM. this interacts with the "issue" FSM
1249         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1250         (outgoing). SVP64 RM prefixes have already been set up by the
1251         "issue" phase, so execute is fairly straightforward.
1252         """
1253
1254         comb = m.d.comb
1255         sync = m.d.sync
1256         pdecode2 = self.pdecode2
1257
1258         # temporaries
1259         core_busy_o = core.n.o_data.busy_o  # core is busy
1260         core_ivalid_i = core.p.i_valid              # instruction is valid
1261
1262         if hasattr(core, "icache"):
1263             fetch_failed = core.icache.i_out.fetch_failed
1264         else:
1265             fetch_failed = Const(0, 1)
1266
1267         with m.FSM(name="exec_fsm"):
1268
1269             # waiting for instruction bus (stays there until not busy)
1270             with m.State("INSN_START"):
1271                 comb += exec_insn_o_ready.eq(1)
1272                 with m.If(exec_insn_i_valid):
1273                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1274                     sync += sv_changed.eq(0)
1275                     sync += pc_changed.eq(0)
1276                     sync += msr_changed.eq(0)
1277                     with m.If(core.p.o_ready):  # only move if accepted
1278                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1279
1280             # instruction started: must wait till it finishes
1281             with m.State("INSN_ACTIVE"):
1282                 # note changes to MSR, PC and SVSTATE
1283                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1284                     sync += sv_changed.eq(1)
1285                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1286                     sync += msr_changed.eq(1)
1287                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1288                     sync += pc_changed.eq(1)
1289                 with m.If(~core_busy_o):  # instruction done!
1290                     comb += exec_pc_o_valid.eq(1)
1291                     with m.If(exec_pc_i_ready):
1292                         # when finished, indicate "done".
1293                         # however, if there was an exception, the instruction
1294                         # is *not* yet done.  this is an implementation
1295                         # detail: we choose to implement exceptions by
1296                         # taking the exception information from the LDST
1297                         # unit, putting that *back* into the PowerDecoder2,
1298                         # and *re-running the entire instruction*.
1299                         # if we erroneously indicate "done" here, it is as if
1300                         # there were *TWO* instructions:
1301                         # 1) the failed LDST 2) a TRAP.
1302                         with m.If(~pdecode2.ldst_exc.happened &
1303                                   ~fetch_failed):
1304                             comb += self.insn_done.eq(1)
1305                         m.next = "INSN_START"  # back to fetch
1306
1307     def elaborate(self, platform):
1308         m = Module()
1309         # convenience
1310         comb, sync = m.d.comb, m.d.sync
1311         cur_state = self.cur_state
1312         pdecode2 = self.pdecode2
1313         dbg = self.dbg
1314         core = self.core
1315
1316         # set up peripherals and core
1317         core_rst = self.core_rst
1318         self.setup_peripherals(m)
1319
1320         # reset current state if core reset requested
1321         with m.If(core_rst):
1322             m.d.sync += self.cur_state.eq(0)
1323
1324         # PC and instruction from I-Memory
1325         comb += self.pc_o.eq(cur_state.pc)
1326         pc_changed = Signal()  # note write to PC
1327         msr_changed = Signal()  # note write to MSR
1328         sv_changed = Signal()  # note write to SVSTATE
1329
1330         # indicate to outside world if any FU is still executing
1331         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1332
1333         # read state either from incoming override or from regfile
1334         state = CoreState("get")  # current state (MSR/PC/SVSTATE)
1335         state_get(m, state.msr, core_rst, self.msr_i,
1336                        "msr",                  # read MSR
1337                        self.state_r_msr, StateRegs.MSR)
1338         state_get(m, state.pc, core_rst, self.pc_i,
1339                        "pc",                  # read PC
1340                        self.state_r_pc, StateRegs.PC)
1341         state_get(m, state.svstate, core_rst, self.svstate_i,
1342                             "svstate",   # read SVSTATE
1343                             self.state_r_sv, StateRegs.SVSTATE)
1344
1345         # don't write pc every cycle
1346         comb += self.state_w_pc.wen.eq(0)
1347         comb += self.state_w_pc.i_data.eq(0)
1348
1349         # address of the next instruction, in the absence of a branch
1350         # depends on the instruction size
1351         nia = Signal(64)
1352
1353         # connect up debug signals
1354         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1355         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1356         comb += dbg.state.eq(state)
1357
1358         # pass the prefix mode from Fetch to Issue, so the latter can loop
1359         # on VL==0
1360         is_svp64_mode = Signal()
1361
1362         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1363         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1364         # these are the handshake signals between each
1365
1366         # fetch FSM can run as soon as the PC is valid
1367         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1368         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1369
1370         # fetch FSM hands over the instruction to be decoded / issued
1371         fetch_insn_o_valid = Signal()
1372         fetch_insn_i_ready = Signal()
1373
1374         # predicate fetch FSM decodes and fetches the predicate
1375         pred_insn_i_valid = Signal()
1376         pred_insn_o_ready = Signal()
1377
1378         # predicate fetch FSM delivers the masks
1379         pred_mask_o_valid = Signal()
1380         pred_mask_i_ready = Signal()
1381
1382         # issue FSM delivers the instruction to the be executed
1383         exec_insn_i_valid = Signal()
1384         exec_insn_o_ready = Signal()
1385
1386         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1387         exec_pc_o_valid = Signal()
1388         exec_pc_i_ready = Signal()
1389
1390         # the FSMs here are perhaps unusual in that they detect conditions
1391         # then "hold" information, combinatorially, for the core
1392         # (as opposed to using sync - which would be on a clock's delay)
1393         # this includes the actual opcode, valid flags and so on.
1394
1395         # Fetch, then predicate fetch, then Issue, then Execute.
1396         # Issue is where the VL for-loop # lives.  the ready/valid
1397         # signalling is used to communicate between the four.
1398
1399         # set up Fetch FSM
1400         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1401                          self.imem, core_rst, pdecode2, cur_state,
1402                          dbg, core, state.svstate, nia, is_svp64_mode)
1403         m.submodules.fetch = fetch
1404         # connect up in/out data to existing Signals
1405         comb += fetch.p.i_data.pc.eq(state.pc)
1406         comb += fetch.p.i_data.msr.eq(state.msr)
1407         # and the ready/valid signalling
1408         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1409         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1410         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1411         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1412
1413         self.issue_fsm(m, core, msr_changed, pc_changed, sv_changed, nia,
1414                        dbg, core_rst, is_svp64_mode,
1415                        fetch_pc_o_ready, fetch_pc_i_valid,
1416                        fetch_insn_o_valid, fetch_insn_i_ready,
1417                        pred_insn_i_valid, pred_insn_o_ready,
1418                        pred_mask_o_valid, pred_mask_i_ready,
1419                        exec_insn_i_valid, exec_insn_o_ready,
1420                        exec_pc_o_valid, exec_pc_i_ready)
1421
1422         if self.svp64_en:
1423             self.fetch_predicate_fsm(m,
1424                                      pred_insn_i_valid, pred_insn_o_ready,
1425                                      pred_mask_o_valid, pred_mask_i_ready)
1426
1427         self.execute_fsm(m, core, msr_changed, pc_changed, sv_changed,
1428                          exec_insn_i_valid, exec_insn_o_ready,
1429                          exec_pc_o_valid, exec_pc_i_ready)
1430
1431         # this bit doesn't have to be in the FSM: connect up to read
1432         # regfiles on demand from DMI
1433         self.do_dmi(m, dbg)
1434
1435         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1436         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1437         self.tb_dec_fsm(m, cur_state.dec)
1438
1439         return m
1440
1441
1442 class TestIssuer(Elaboratable):
1443     def __init__(self, pspec):
1444         self.ti = TestIssuerInternal(pspec)
1445         self.pll = DummyPLL(instance=True)
1446
1447         # PLL direct clock or not
1448         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1449         if self.pll_en:
1450             self.pll_test_o = Signal(reset_less=True)
1451             self.pll_vco_o = Signal(reset_less=True)
1452             self.clk_sel_i = Signal(2, reset_less=True)
1453             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1454             self.pllclk_clk = ClockSignal("pllclk")
1455
1456     def elaborate(self, platform):
1457         m = Module()
1458         comb = m.d.comb
1459
1460         # TestIssuer nominally runs at main clock, actually it is
1461         # all combinatorial internally except for coresync'd components
1462         m.submodules.ti = ti = self.ti
1463
1464         if self.pll_en:
1465             # ClockSelect runs at PLL output internal clock rate
1466             m.submodules.wrappll = pll = self.pll
1467
1468             # add clock domains from PLL
1469             cd_pll = ClockDomain("pllclk")
1470             m.domains += cd_pll
1471
1472             # PLL clock established.  has the side-effect of running clklsel
1473             # at the PLL's speed (see DomainRenamer("pllclk") above)
1474             pllclk = self.pllclk_clk
1475             comb += pllclk.eq(pll.clk_pll_o)
1476
1477             # wire up external 24mhz to PLL
1478             #comb += pll.clk_24_i.eq(self.ref_clk)
1479             # output 18 mhz PLL test signal, and analog oscillator out
1480             comb += self.pll_test_o.eq(pll.pll_test_o)
1481             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1482
1483             # input to pll clock selection
1484             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1485
1486             # now wire up ResetSignals.  don't mind them being in this domain
1487             pll_rst = ResetSignal("pllclk")
1488             comb += pll_rst.eq(ResetSignal())
1489
1490         # internal clock is set to selector clock-out.  has the side-effect of
1491         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1492         # debug clock runs at coresync internal clock
1493         cd_coresync = ClockDomain("coresync")
1494         #m.domains += cd_coresync
1495         if self.ti.dbg_domain != 'sync':
1496             cd_dbgsync = ClockDomain("dbgsync")
1497             #m.domains += cd_dbgsync
1498         intclk = ClockSignal("coresync")
1499         dbgclk = ClockSignal(self.ti.dbg_domain)
1500         # XXX BYPASS PLL XXX
1501         # XXX BYPASS PLL XXX
1502         # XXX BYPASS PLL XXX
1503         if self.pll_en:
1504             comb += intclk.eq(self.ref_clk)
1505         else:
1506             comb += intclk.eq(ClockSignal())
1507         if self.ti.dbg_domain != 'sync':
1508             dbgclk = ClockSignal(self.ti.dbg_domain)
1509             comb += dbgclk.eq(intclk)
1510
1511         return m
1512
1513     def ports(self):
1514         return list(self.ti.ports()) + list(self.pll.ports()) + \
1515             [ClockSignal(), ResetSignal()]
1516
1517     def external_ports(self):
1518         ports = self.ti.external_ports()
1519         ports.append(ClockSignal())
1520         ports.append(ResetSignal())
1521         if self.pll_en:
1522             ports.append(self.clk_sel_i)
1523             ports.append(self.pll.clk_24_i)
1524             ports.append(self.pll_test_o)
1525             ports.append(self.pll_vco_o)
1526             ports.append(self.pllclk_clk)
1527             ports.append(self.ref_clk)
1528         return ports
1529
1530
1531 if __name__ == '__main__':
1532     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1533              'spr': 1,
1534              'div': 1,
1535              'mul': 1,
1536              'shiftrot': 1
1537              }
1538     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1539                          imem_ifacetype='bare_wb',
1540                          addr_wid=48,
1541                          mask_wid=8,
1542                          reg_wid=64,
1543                          units=units)
1544     dut = TestIssuer(pspec)
1545     vl = main(dut, ports=dut.ports(), name="test_issuer")
1546
1547     if len(sys.argv) == 1:
1548         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1549         with open("test_issuer.il", "w") as f:
1550             f.write(vl)