src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the PC
  70     res = Signal(64, reset_less=True, name=name)
  71     res_ok_delay = Signal(name="%s_ok_delay" % name)
  72     with m.If(~core_rst):
  73         sync += res_ok_delay.eq(~state_i.ok)
  74         with m.If(state_i.ok):
  75             # incoming override (start from pc_i)
  76             comb += res.eq(state_i.data)
  77         with m.Else():
  78             # otherwise read StateRegs regfile for PC...
  79             comb += regfile.ren.eq(1 << regnum)
  80         # ... but on a 1-clock delay
  81         with m.If(res_ok_delay):
  82             comb += res.eq(regfile.o_data)
  83     return res
  84
  85
  86 def get_predint(m, mask, name):
  87     """decode SVP64 predicate integer mask field to reg number and invert
  88     this is identical to the equivalent function in ISACaller except that
  89     it doesn't read the INT directly, it just decodes "what needs to be done"
  90     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  91
  92     * all1s is set to indicate that no mask is to be applied.
  93     * regread indicates the GPR register number to be read
  94     * invert is set to indicate that the register value is to be inverted
  95     * unary indicates that the contents of the register is to be shifted 1<<r3
  96     """
  97     comb = m.d.comb
  98     regread = Signal(5, name=name+"regread")
  99     invert = Signal(name=name+"invert")
 100     unary = Signal(name=name+"unary")
 101     all1s = Signal(name=name+"all1s")
 102     with m.Switch(mask):
 103         with m.Case(SVP64PredInt.ALWAYS.value):
 104             comb += all1s.eq(1)      # use 0b1111 (all ones)
 105         with m.Case(SVP64PredInt.R3_UNARY.value):
 106             comb += regread.eq(3)
 107             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 108         with m.Case(SVP64PredInt.R3.value):
 109             comb += regread.eq(3)
 110         with m.Case(SVP64PredInt.R3_N.value):
 111             comb += regread.eq(3)
 112             comb += invert.eq(1)
 113         with m.Case(SVP64PredInt.R10.value):
 114             comb += regread.eq(10)
 115         with m.Case(SVP64PredInt.R10_N.value):
 116             comb += regread.eq(10)
 117             comb += invert.eq(1)
 118         with m.Case(SVP64PredInt.R30.value):
 119             comb += regread.eq(30)
 120         with m.Case(SVP64PredInt.R30_N.value):
 121             comb += regread.eq(30)
 122             comb += invert.eq(1)
 123     return regread, invert, unary, all1s
 124
 125
 126 def get_predcr(m, mask, name):
 127     """decode SVP64 predicate CR to reg number field and invert status
 128     this is identical to _get_predcr in ISACaller
 129     """
 130     comb = m.d.comb
 131     idx = Signal(2, name=name+"idx")
 132     invert = Signal(name=name+"crinvert")
 133     with m.Switch(mask):
 134         with m.Case(SVP64PredCR.LT.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(0)
 137         with m.Case(SVP64PredCR.GE.value):
 138             comb += idx.eq(CR.LT)
 139             comb += invert.eq(1)
 140         with m.Case(SVP64PredCR.GT.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(0)
 143         with m.Case(SVP64PredCR.LE.value):
 144             comb += idx.eq(CR.GT)
 145             comb += invert.eq(1)
 146         with m.Case(SVP64PredCR.EQ.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(0)
 149         with m.Case(SVP64PredCR.NE.value):
 150             comb += idx.eq(CR.EQ)
 151             comb += invert.eq(1)
 152         with m.Case(SVP64PredCR.SO.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(0)
 155         with m.Case(SVP64PredCR.NS.value):
 156             comb += idx.eq(CR.SO)
 157             comb += invert.eq(1)
 158     return idx, invert
 159
 160
 161 # Fetch Finite State Machine.
 162 # WARNING: there are currently DriverConflicts but it's actually working.
 163 # TODO, here: everything that is global in nature, information from the
 164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 165 # not only that: TestIssuerInternal.imem can entirely move into here
 166 # because imem is only ever accessed inside the FetchFSM.
 167 class FetchFSM(ControlBase):
 168     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 169                  pdecode2, cur_state,
 170                  dbg, core, svstate, nia, is_svp64_mode):
 171         self.allow_overlap = allow_overlap
 172         self.svp64_en = svp64_en
 173         self.imem = imem
 174         self.core_rst = core_rst
 175         self.pdecode2 = pdecode2
 176         self.cur_state = cur_state
 177         self.dbg = dbg
 178         self.core = core
 179         self.svstate = svstate
 180         self.nia = nia
 181         self.is_svp64_mode = is_svp64_mode
 182
 183         # set up pipeline ControlBase and allocate i/o specs
 184         # (unusual: normally done by the Pipeline API)
 185         super().__init__(stage=self)
 186         self.p.i_data, self.n.o_data = self.new_specs(None)
 187         self.i, self.o = self.p.i_data, self.n.o_data
 188
 189     # next 3 functions are Stage API Compliance
 190     def setup(self, m, i):
 191         pass
 192
 193     def ispec(self):
 194         return FetchInput()
 195
 196     def ospec(self):
 197         return FetchOutput()
 198
 199     def elaborate(self, platform):
 200         """fetch FSM
 201
 202         this FSM performs fetch of raw instruction data, partial-decodes
 203         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 204         read a 2nd 32-bit quantity if that occurs.
 205         """
 206         m = super().elaborate(platform)
 207
 208         dbg = self.dbg
 209         core = self.core
 210         pc = self.i.pc
 211         msr = self.i.msr
 212         svstate = self.svstate
 213         nia = self.nia
 214         is_svp64_mode = self.is_svp64_mode
 215         fetch_pc_o_ready = self.p.o_ready
 216         fetch_pc_i_valid = self.p.i_valid
 217         fetch_insn_o_valid = self.n.o_valid
 218         fetch_insn_i_ready = self.n.i_ready
 219
 220         comb = m.d.comb
 221         sync = m.d.sync
 222         pdecode2 = self.pdecode2
 223         cur_state = self.cur_state
 224         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 225
 226         # also note instruction fetch failed
 227         if hasattr(core, "icache"):
 228             fetch_failed = core.icache.i_out.fetch_failed
 229             flush_needed = True
 230         else:
 231             fetch_failed = Const(0, 1)
 232             flush_needed = False
 233
 234         with m.FSM(name='fetch_fsm'):
 235
 236             # waiting (zzz)
 237             with m.State("IDLE"):
 238                 with m.If(~dbg.stopping_o & ~fetch_failed):
 239                     comb += fetch_pc_o_ready.eq(1)
 240                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 241                     # instruction allowed to go: start by reading the PC
 242                     # capture the PC and also drop it into Insn Memory
 243                     # we have joined a pair of combinatorial memory
 244                     # lookups together.  this is Generally Bad.
 245                     comb += self.imem.a_pc_i.eq(pc)
 246                     comb += self.imem.a_i_valid.eq(1)
 247                     comb += self.imem.f_i_valid.eq(1)
 248                     sync += cur_state.pc.eq(pc)
 249                     sync += cur_state.svstate.eq(svstate)  # and svstate
 250                     sync += cur_state.msr.eq(msr)  # and msr
 251
 252                     m.next = "INSN_READ"  # move to "wait for bus" phase
 253
 254             # dummy pause to find out why simulation is not keeping up
 255             with m.State("INSN_READ"):
 256                 if self.allow_overlap:
 257                     stopping = dbg.stopping_o
 258                 else:
 259                     stopping = Const(0)
 260                 with m.If(stopping):
 261                     # stopping: jump back to idle
 262                     m.next = "IDLE"
 263                 with m.Else():
 264                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 265                         # busy but not fetch failed: stay in wait-read
 266                         comb += self.imem.a_i_valid.eq(1)
 267                         comb += self.imem.f_i_valid.eq(1)
 268                     with m.Else():
 269                         # not busy (or fetch failed!): instruction fetched
 270                         # when fetch failed, the instruction gets ignored
 271                         # by the decoder
 272                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 273                         if self.svp64_en:
 274                             svp64 = self.svp64
 275                             # decode the SVP64 prefix, if any
 276                             comb += svp64.raw_opcode_in.eq(insn)
 277                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 278                             # pass the decoded prefix (if any) to PowerDecoder2
 279                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 280                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 281                             # remember whether this is a prefixed instruction,
 282                             # so the FSM can readily loop when VL==0
 283                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 284                             # calculate the address of the following instruction
 285                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 286                             sync += nia.eq(cur_state.pc + insn_size)
 287                             with m.If(~svp64.is_svp64_mode):
 288                                 # with no prefix, store the instruction
 289                                 # and hand it directly to the next FSM
 290                                 sync += dec_opcode_o.eq(insn)
 291                                 m.next = "INSN_READY"
 292                             with m.Else():
 293                                 # fetch the rest of the instruction from memory
 294                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 295                                 comb += self.imem.a_i_valid.eq(1)
 296                                 comb += self.imem.f_i_valid.eq(1)
 297                                 m.next = "INSN_READ2"
 298                         else:
 299                             # not SVP64 - 32-bit only
 300                             sync += nia.eq(cur_state.pc + 4)
 301                             sync += dec_opcode_o.eq(insn)
 302                             m.next = "INSN_READY"
 303
 304             with m.State("INSN_READ2"):
 305                 with m.If(self.imem.f_busy_o):  # zzz...
 306                     # busy: stay in wait-read
 307                     comb += self.imem.a_i_valid.eq(1)
 308                     comb += self.imem.f_i_valid.eq(1)
 309                 with m.Else():
 310                     # not busy: instruction fetched
 311                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 312                     sync += dec_opcode_o.eq(insn)
 313                     m.next = "INSN_READY"
 314                     # TODO: probably can start looking at pdecode2.rm_dec
 315                     # here or maybe even in INSN_READ state, if svp64_mode
 316                     # detected, in order to trigger - and wait for - the
 317                     # predicate reading.
 318                     if self.svp64_en:
 319                         pmode = pdecode2.rm_dec.predmode
 320                     """
 321                     if pmode != SVP64PredMode.ALWAYS.value:
 322                         fire predicate loading FSM and wait before
 323                         moving to INSN_READY
 324                     else:
 325                         sync += self.srcmask.eq(-1) # set to all 1s
 326                         sync += self.dstmask.eq(-1) # set to all 1s
 327                         m.next = "INSN_READY"
 328                     """
 329
 330             with m.State("INSN_READY"):
 331                 # hand over the instruction, to be decoded
 332                 comb += fetch_insn_o_valid.eq(1)
 333                 with m.If(fetch_insn_i_ready):
 334                     m.next = "IDLE"
 335
 336         # whatever was done above, over-ride it if core reset is held
 337         with m.If(self.core_rst):
 338             sync += nia.eq(0)
 339
 340         return m
 341
 342
 343 class TestIssuerInternal(Elaboratable):
 344     """TestIssuer - reads instructions from TestMemory and issues them
 345
 346     efficiency and speed is not the main goal here: functional correctness
 347     and code clarity is.  optimisations (which almost 100% interfere with
 348     easy understanding) come later.
 349     """
 350
 351     def __init__(self, pspec):
 352
 353         # test is SVP64 is to be enabled
 354         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 355
 356         # and if regfiles are reduced
 357         self.regreduce_en = (hasattr(pspec, "regreduce") and
 358                              (pspec.regreduce == True))
 359
 360         # and if overlap requested
 361         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 362                               (pspec.allow_overlap == True))
 363
 364         # JTAG interface.  add this right at the start because if it's
 365         # added it *modifies* the pspec, by adding enable/disable signals
 366         # for parts of the rest of the core
 367         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 368         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 369         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 370         if self.jtag_en:
 371             # XXX MUST keep this up-to-date with litex, and
 372             # soc-cocotb-sim, and err.. all needs sorting out, argh
 373             subset = ['uart',
 374                       'mtwi',
 375                       'eint', 'gpio', 'mspi0',
 376                       # 'mspi1', - disabled for now
 377                       # 'pwm', 'sd0', - disabled for now
 378                       'sdr']
 379             self.jtag = JTAG(get_pinspecs(subset=subset),
 380                              domain=self.dbg_domain)
 381             # add signals to pspec to enable/disable icache and dcache
 382             # (or data and intstruction wishbone if icache/dcache not included)
 383             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 384             # TODO: do we actually care if these are not domain-synchronised?
 385             # honestly probably not.
 386             pspec.wb_icache_en = self.jtag.wb_icache_en
 387             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 388             self.wb_sram_en = self.jtag.wb_sram_en
 389         else:
 390             self.wb_sram_en = Const(1)
 391
 392         # add 4k sram blocks?
 393         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 394                          pspec.sram4x4kblock == True)
 395         if self.sram4x4k:
 396             self.sram4k = []
 397             for i in range(4):
 398                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 399                                                     # features={'err'}
 400                                                     ))
 401
 402         # add interrupt controller?
 403         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 404         if self.xics:
 405             self.xics_icp = XICS_ICP()
 406             self.xics_ics = XICS_ICS()
 407             self.int_level_i = self.xics_ics.int_level_i
 408
 409         # add GPIO peripheral?
 410         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 411         if self.gpio:
 412             self.simple_gpio = SimpleGPIO()
 413             self.gpio_o = self.simple_gpio.gpio_o
 414
 415         # main instruction core.  suitable for prototyping / demo only
 416         self.core = core = NonProductionCore(pspec)
 417         self.core_rst = ResetSignal("coresync")
 418
 419         # instruction decoder.  goes into Trap Record
 420         #pdecode = create_pdecode()
 421         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 422         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 423                                      opkls=IssuerDecode2ToOperand,
 424                                      svp64_en=self.svp64_en,
 425                                      regreduce_en=self.regreduce_en)
 426         pdecode = self.pdecode2.dec
 427
 428         if self.svp64_en:
 429             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 430
 431         # Test Instruction memory
 432         if hasattr(core, "icache"):
 433             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 434             # truly dreadful.  needs a huge reorg.
 435             pspec.icache = core.icache
 436         self.imem = ConfigFetchUnit(pspec).fu
 437
 438         # DMI interface
 439         self.dbg = CoreDebug()
 440
 441         # instruction go/monitor
 442         self.pc_o = Signal(64, reset_less=True)
 443         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 444         self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
 445         self.svstate_i = Data(64, "svstate_i")  # ditto
 446         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 447         self.busy_o = Signal(reset_less=True)
 448         self.memerr_o = Signal(reset_less=True)
 449
 450         # STATE regfile read /write ports for PC, MSR, SVSTATE
 451         staterf = self.core.regs.rf['state']
 452         self.state_r_msr = staterf.r_ports['msr']  # MSR rd
 453         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 454         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 455
 456         self.state_w_msr = staterf.w_ports['msr']  # MSR wr
 457         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 458         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 459
 460         # DMI interface access
 461         intrf = self.core.regs.rf['int']
 462         crrf = self.core.regs.rf['cr']
 463         xerrf = self.core.regs.rf['xer']
 464         self.int_r = intrf.r_ports['dmi']  # INT read
 465         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 466         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 467
 468         if self.svp64_en:
 469             # for predication
 470             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 471             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 472
 473         # hack method of keeping an eye on whether branch/trap set the PC
 474         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 475         self.state_nia.wen.name = 'state_nia_wen'
 476
 477         # pulse to synchronize the simulator at instruction end
 478         self.insn_done = Signal()
 479
 480         # indicate any instruction still outstanding, in execution
 481         self.any_busy = Signal()
 482
 483         if self.svp64_en:
 484             # store copies of predicate masks
 485             self.srcmask = Signal(64)
 486             self.dstmask = Signal(64)
 487
 488     def fetch_predicate_fsm(self, m,
 489                             pred_insn_i_valid, pred_insn_o_ready,
 490                             pred_mask_o_valid, pred_mask_i_ready):
 491         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 492            src/dest predicate masks
 493
 494         https://bugs.libre-soc.org/show_bug.cgi?id=617
 495         the predicates can be read here, by using IntRegs r_ports['pred']
 496         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 497         be done through multiple reads, extracting one relevant at a time.
 498         later, a faster way would be to use the 32-bit-wide CR port but
 499         this is more complex decoding, here.  equivalent code used in
 500         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 501
 502         note: this ENTIRE FSM is not to be called when svp64 is disabled
 503         """
 504         comb = m.d.comb
 505         sync = m.d.sync
 506         pdecode2 = self.pdecode2
 507         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 508         predmode = rm_dec.predmode
 509         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 510         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 511         # get src/dst step, so we can skip already used mask bits
 512         cur_state = self.cur_state
 513         srcstep = cur_state.svstate.srcstep
 514         dststep = cur_state.svstate.dststep
 515         cur_vl = cur_state.svstate.vl
 516
 517         # decode predicates
 518         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 519         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 520         sidx, scrinvert = get_predcr(m, srcpred, 's')
 521         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 522
 523         # store fetched masks, for either intpred or crpred
 524         # when src/dst step is not zero, the skipped mask bits need to be
 525         # shifted-out, before actually storing them in src/dest mask
 526         new_srcmask = Signal(64, reset_less=True)
 527         new_dstmask = Signal(64, reset_less=True)
 528
 529         with m.FSM(name="fetch_predicate"):
 530
 531             with m.State("FETCH_PRED_IDLE"):
 532                 comb += pred_insn_o_ready.eq(1)
 533                 with m.If(pred_insn_i_valid):
 534                     with m.If(predmode == SVP64PredMode.INT):
 535                         # skip fetching destination mask register, when zero
 536                         with m.If(dall1s):
 537                             sync += new_dstmask.eq(-1)
 538                             # directly go to fetch source mask register
 539                             # guaranteed not to be zero (otherwise predmode
 540                             # would be SVP64PredMode.ALWAYS, not INT)
 541                             comb += int_pred.addr.eq(sregread)
 542                             comb += int_pred.ren.eq(1)
 543                             m.next = "INT_SRC_READ"
 544                         # fetch destination predicate register
 545                         with m.Else():
 546                             comb += int_pred.addr.eq(dregread)
 547                             comb += int_pred.ren.eq(1)
 548                             m.next = "INT_DST_READ"
 549                     with m.Elif(predmode == SVP64PredMode.CR):
 550                         # go fetch masks from the CR register file
 551                         sync += new_srcmask.eq(0)
 552                         sync += new_dstmask.eq(0)
 553                         m.next = "CR_READ"
 554                     with m.Else():
 555                         sync += self.srcmask.eq(-1)
 556                         sync += self.dstmask.eq(-1)
 557                         m.next = "FETCH_PRED_DONE"
 558
 559             with m.State("INT_DST_READ"):
 560                 # store destination mask
 561                 inv = Repl(dinvert, 64)
 562                 with m.If(dunary):
 563                     # set selected mask bit for 1<<r3 mode
 564                     dst_shift = Signal(range(64))
 565                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 566                     sync += new_dstmask.eq(1 << dst_shift)
 567                 with m.Else():
 568                     # invert mask if requested
 569                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 570                 # skip fetching source mask register, when zero
 571                 with m.If(sall1s):
 572                     sync += new_srcmask.eq(-1)
 573                     m.next = "FETCH_PRED_SHIFT_MASK"
 574                 # fetch source predicate register
 575                 with m.Else():
 576                     comb += int_pred.addr.eq(sregread)
 577                     comb += int_pred.ren.eq(1)
 578                     m.next = "INT_SRC_READ"
 579
 580             with m.State("INT_SRC_READ"):
 581                 # store source mask
 582                 inv = Repl(sinvert, 64)
 583                 with m.If(sunary):
 584                     # set selected mask bit for 1<<r3 mode
 585                     src_shift = Signal(range(64))
 586                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 587                     sync += new_srcmask.eq(1 << src_shift)
 588                 with m.Else():
 589                     # invert mask if requested
 590                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 591                 m.next = "FETCH_PRED_SHIFT_MASK"
 592
 593             # fetch masks from the CR register file
 594             # implements the following loop:
 595             # idx, inv = get_predcr(mask)
 596             # mask = 0
 597             # for cr_idx in range(vl):
 598             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 599             #     if cr[idx] ^ inv:
 600             #         mask |= 1 << cr_idx
 601             # return mask
 602             with m.State("CR_READ"):
 603                 # CR index to be read, which will be ready by the next cycle
 604                 cr_idx = Signal.like(cur_vl, reset_less=True)
 605                 # submit the read operation to the regfile
 606                 with m.If(cr_idx != cur_vl):
 607                     # the CR read port is unary ...
 608                     # ren = 1 << cr_idx
 609                     # ... in MSB0 convention ...
 610                     # ren = 1 << (7 - cr_idx)
 611                     # ... and with an offset:
 612                     # ren = 1 << (7 - off - cr_idx)
 613                     idx = SVP64CROffs.CRPred + cr_idx
 614                     comb += cr_pred.ren.eq(1 << (7 - idx))
 615                     # signal data valid in the next cycle
 616                     cr_read = Signal(reset_less=True)
 617                     sync += cr_read.eq(1)
 618                     # load the next index
 619                     sync += cr_idx.eq(cr_idx + 1)
 620                 with m.Else():
 621                     # exit on loop end
 622                     sync += cr_read.eq(0)
 623                     sync += cr_idx.eq(0)
 624                     m.next = "FETCH_PRED_SHIFT_MASK"
 625                 with m.If(cr_read):
 626                     # compensate for the one cycle delay on the regfile
 627                     cur_cr_idx = Signal.like(cur_vl)
 628                     comb += cur_cr_idx.eq(cr_idx - 1)
 629                     # read the CR field, select the appropriate bit
 630                     cr_field = Signal(4)
 631                     scr_bit = Signal()
 632                     dcr_bit = Signal()
 633                     comb += cr_field.eq(cr_pred.o_data)
 634                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 635                                        ^ scrinvert)
 636                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 637                                        ^ dcrinvert)
 638                     # set the corresponding mask bit
 639                     bit_to_set = Signal.like(self.srcmask)
 640                     comb += bit_to_set.eq(1 << cur_cr_idx)
 641                     with m.If(scr_bit):
 642                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 643                     with m.If(dcr_bit):
 644                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 645
 646             with m.State("FETCH_PRED_SHIFT_MASK"):
 647                 # shift-out skipped mask bits
 648                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 649                 sync += self.dstmask.eq(new_dstmask >> dststep)
 650                 m.next = "FETCH_PRED_DONE"
 651
 652             with m.State("FETCH_PRED_DONE"):
 653                 comb += pred_mask_o_valid.eq(1)
 654                 with m.If(pred_mask_i_ready):
 655                     m.next = "FETCH_PRED_IDLE"
 656
 657     def issue_fsm(self, m, core, msr_changed, pc_changed, sv_changed, nia,
 658                   dbg, core_rst, is_svp64_mode,
 659                   fetch_pc_o_ready, fetch_pc_i_valid,
 660                   fetch_insn_o_valid, fetch_insn_i_ready,
 661                   pred_insn_i_valid, pred_insn_o_ready,
 662                   pred_mask_o_valid, pred_mask_i_ready,
 663                   exec_insn_i_valid, exec_insn_o_ready,
 664                   exec_pc_o_valid, exec_pc_i_ready):
 665         """issue FSM
 666
 667         decode / issue FSM.  this interacts with the "fetch" FSM
 668         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 669         (outgoing). also interacts with the "execute" FSM
 670         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 671         (incoming).
 672         SVP64 RM prefixes have already been set up by the
 673         "fetch" phase, so execute is fairly straightforward.
 674         """
 675
 676         comb = m.d.comb
 677         sync = m.d.sync
 678         pdecode2 = self.pdecode2
 679         cur_state = self.cur_state
 680
 681         # temporaries
 682         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 683
 684         # for updating svstate (things like srcstep etc.)
 685         update_svstate = Signal()  # set this (below) if updating
 686         new_svstate = SVSTATERec("new_svstate")
 687         comb += new_svstate.eq(cur_state.svstate)
 688
 689         # precalculate srcstep+1 and dststep+1
 690         cur_srcstep = cur_state.svstate.srcstep
 691         cur_dststep = cur_state.svstate.dststep
 692         next_srcstep = Signal.like(cur_srcstep)
 693         next_dststep = Signal.like(cur_dststep)
 694         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 695         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 696
 697         # note if an exception happened.  in a pipelined or OoO design
 698         # this needs to be accompanied by "shadowing" (or stalling)
 699         exc_happened = self.core.o.exc_happened
 700         # also note instruction fetch failed
 701         if hasattr(core, "icache"):
 702             fetch_failed = core.icache.i_out.fetch_failed
 703             flush_needed = True
 704             # set to fault in decoder
 705             # update (highest priority) instruction fault
 706             rising_fetch_failed = rising_edge(m, fetch_failed)
 707             with m.If(rising_fetch_failed):
 708                 sync += pdecode2.instr_fault.eq(1)
 709         else:
 710             fetch_failed = Const(0, 1)
 711             flush_needed = False
 712
 713         with m.FSM(name="issue_fsm"):
 714
 715             # sync with the "fetch" phase which is reading the instruction
 716             # at this point, there is no instruction running, that
 717             # could inadvertently update the PC.
 718             with m.State("ISSUE_START"):
 719                 # reset instruction fault
 720                 sync += pdecode2.instr_fault.eq(0)
 721                 # wait on "core stop" release, before next fetch
 722                 # need to do this here, in case we are in a VL==0 loop
 723                 with m.If(~dbg.core_stop_o & ~core_rst):
 724                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 725                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 726                         m.next = "INSN_WAIT"
 727                 with m.Else():
 728                     # tell core it's stopped, and acknowledge debug handshake
 729                     comb += dbg.core_stopped_i.eq(1)
 730                     # while stopped, allow updating the MSR, PC and SVSTATE
 731                     with m.If(self.pc_i.ok):
 732                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 733                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 734                         sync += pc_changed.eq(1)
 735                     with m.If(self.msr_i.ok):
 736                         comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 737                         comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 738                         sync += msr_changed.eq(1)
 739                     with m.If(self.svstate_i.ok):
 740                         comb += new_svstate.eq(self.svstate_i.data)
 741                         comb += update_svstate.eq(1)
 742                         sync += sv_changed.eq(1)
 743
 744             # wait for an instruction to arrive from Fetch
 745             with m.State("INSN_WAIT"):
 746                 if self.allow_overlap:
 747                     stopping = dbg.stopping_o
 748                 else:
 749                     stopping = Const(0)
 750                 with m.If(stopping):
 751                     # stopping: jump back to idle
 752                     m.next = "ISSUE_START"
 753                     if flush_needed:
 754                         # request the icache to stop asserting "failed"
 755                         comb += core.icache.flush_in.eq(1)
 756                     # stop instruction fault
 757                     sync += pdecode2.instr_fault.eq(0)
 758                 with m.Else():
 759                     comb += fetch_insn_i_ready.eq(1)
 760                     with m.If(fetch_insn_o_valid):
 761                         # loop into ISSUE_START if it's a SVP64 instruction
 762                         # and VL == 0.  this because VL==0 is a for-loop
 763                         # from 0 to 0 i.e. always, always a NOP.
 764                         cur_vl = cur_state.svstate.vl
 765                         with m.If(is_svp64_mode & (cur_vl == 0)):
 766                             # update the PC before fetching the next instruction
 767                             # since we are in a VL==0 loop, no instruction was
 768                             # executed that we could be overwriting
 769                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 770                             comb += self.state_w_pc.i_data.eq(nia)
 771                             comb += self.insn_done.eq(1)
 772                             m.next = "ISSUE_START"
 773                         with m.Else():
 774                             if self.svp64_en:
 775                                 m.next = "PRED_START"  # fetching predicate
 776                             else:
 777                                 m.next = "DECODE_SV"  # skip predication
 778
 779             with m.State("PRED_START"):
 780                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 781                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 782                     m.next = "MASK_WAIT"
 783
 784             with m.State("MASK_WAIT"):
 785                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
 786                 with m.If(pred_mask_o_valid):  # predication masks are ready
 787                     m.next = "PRED_SKIP"
 788
 789             # skip zeros in predicate
 790             with m.State("PRED_SKIP"):
 791                 with m.If(~is_svp64_mode):
 792                     m.next = "DECODE_SV"  # nothing to do
 793                 with m.Else():
 794                     if self.svp64_en:
 795                         pred_src_zero = pdecode2.rm_dec.pred_sz
 796                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 797
 798                         # new srcstep, after skipping zeros
 799                         skip_srcstep = Signal.like(cur_srcstep)
 800                         # value to be added to the current srcstep
 801                         src_delta = Signal.like(cur_srcstep)
 802                         # add leading zeros to srcstep, if not in zero mode
 803                         with m.If(~pred_src_zero):
 804                             # priority encoder (count leading zeros)
 805                             # append guard bit, in case the mask is all zeros
 806                             pri_enc_src = PriorityEncoder(65)
 807                             m.submodules.pri_enc_src = pri_enc_src
 808                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 809                                                          Const(1, 1)))
 810                             comb += src_delta.eq(pri_enc_src.o)
 811                         # apply delta to srcstep
 812                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 813                         # shift-out all leading zeros from the mask
 814                         # plus the leading "one" bit
 815                         # TODO count leading zeros and shift-out the zero
 816                         #      bits, in the same step, in hardware
 817                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 818
 819                         # same as above, but for dststep
 820                         skip_dststep = Signal.like(cur_dststep)
 821                         dst_delta = Signal.like(cur_dststep)
 822                         with m.If(~pred_dst_zero):
 823                             pri_enc_dst = PriorityEncoder(65)
 824                             m.submodules.pri_enc_dst = pri_enc_dst
 825                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 826                                                          Const(1, 1)))
 827                             comb += dst_delta.eq(pri_enc_dst.o)
 828                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 829                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 830
 831                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 832                         with m.If((skip_srcstep >= cur_vl) |
 833                                   (skip_dststep >= cur_vl)):
 834                             # end of VL loop. Update PC and reset src/dst step
 835                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 836                             comb += self.state_w_pc.i_data.eq(nia)
 837                             comb += new_svstate.srcstep.eq(0)
 838                             comb += new_svstate.dststep.eq(0)
 839                             comb += update_svstate.eq(1)
 840                             # synchronize with the simulator
 841                             comb += self.insn_done.eq(1)
 842                             # go back to Issue
 843                             m.next = "ISSUE_START"
 844                         with m.Else():
 845                             # update new src/dst step
 846                             comb += new_svstate.srcstep.eq(skip_srcstep)
 847                             comb += new_svstate.dststep.eq(skip_dststep)
 848                             comb += update_svstate.eq(1)
 849                             # proceed to Decode
 850                             m.next = "DECODE_SV"
 851
 852                         # pass predicate mask bits through to satellite decoders
 853                         # TODO: for SIMD this will be *multiple* bits
 854                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 855                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 856
 857             # after src/dst step have been updated, we are ready
 858             # to decode the instruction
 859             with m.State("DECODE_SV"):
 860                 # decode the instruction
 861                 with m.If(~fetch_failed):
 862                     sync += pdecode2.instr_fault.eq(0)
 863                 sync += core.i.e.eq(pdecode2.e)
 864                 sync += core.i.state.eq(cur_state)
 865                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 866                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 867                 if self.svp64_en:
 868                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 869                     # set RA_OR_ZERO detection in satellite decoders
 870                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 871                     # and svp64 detection
 872                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 873                     # and svp64 bit-rev'd ldst mode
 874                     ldst_dec = pdecode2.use_svp64_ldst_dec
 875                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 876                 # after decoding, reset any previous exception condition,
 877                 # allowing it to be set again during the next execution
 878                 sync += pdecode2.ldst_exc.eq(0)
 879
 880                 m.next = "INSN_EXECUTE"  # move to "execute"
 881
 882             # handshake with execution FSM, move to "wait" once acknowledged
 883             with m.State("INSN_EXECUTE"):
 884                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 885                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 886                     m.next = "EXECUTE_WAIT"
 887
 888             with m.State("EXECUTE_WAIT"):
 889                 # wait on "core stop" release, at instruction end
 890                 # need to do this here, in case we are in a VL>1 loop
 891                 with m.If(~dbg.core_stop_o & ~core_rst):
 892                     comb += exec_pc_i_ready.eq(1)
 893                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 894                     # the exception info needs to be blatted into
 895                     # pdecode.ldst_exc, and the instruction "re-run".
 896                     # when ldst_exc.happened is set, the PowerDecoder2
 897                     # reacts very differently: it re-writes the instruction
 898                     # with a "trap" (calls PowerDecoder2.trap()) which
 899                     # will *overwrite* whatever was requested and jump the
 900                     # PC to the exception address, as well as alter MSR.
 901                     # nothing else needs to be done other than to note
 902                     # the change of PC and MSR (and, later, SVSTATE)
 903                     with m.If(exc_happened):
 904                         mmu = core.fus.get_exc("mmu0")
 905                         ldst = core.fus.get_exc("ldst0")
 906                         if mmu is not None:
 907                             with m.If(fetch_failed):
 908                                 # instruction fetch: exception is from MMU
 909                                 # reset instr_fault (highest priority)
 910                                 sync += pdecode2.ldst_exc.eq(mmu)
 911                                 sync += pdecode2.instr_fault.eq(0)
 912                                 if flush_needed:
 913                                     # request icache to stop asserting "failed"
 914                                     comb += core.icache.flush_in.eq(1)
 915                         with m.If(~fetch_failed):
 916                             # otherwise assume it was a LDST exception
 917                             sync += pdecode2.ldst_exc.eq(ldst)
 918
 919                     with m.If(exec_pc_o_valid):
 920
 921                         # was this the last loop iteration?
 922                         is_last = Signal()
 923                         cur_vl = cur_state.svstate.vl
 924                         comb += is_last.eq(next_srcstep == cur_vl)
 925
 926                         # return directly to Decode if Execute generated an
 927                         # exception.
 928                         with m.If(pdecode2.ldst_exc.happened):
 929                             m.next = "DECODE_SV"
 930
 931                         # if MSR, PC or SVSTATE were changed by the previous
 932                         # instruction, go directly back to Fetch, without
 933                         # updating either MSR PC or SVSTATE
 934                         with m.Elif(msr_changed | pc_changed | sv_changed):
 935                             m.next = "ISSUE_START"
 936
 937                         # also return to Fetch, when no output was a vector
 938                         # (regardless of SRCSTEP and VL), or when the last
 939                         # instruction was really the last one of the VL loop
 940                         with m.Elif((~pdecode2.loop_continue) | is_last):
 941                             # before going back to fetch, update the PC state
 942                             # register with the NIA.
 943                             # ok here we are not reading the branch unit.
 944                             # TODO: this just blithely overwrites whatever
 945                             #       pipeline updated the PC
 946                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 947                             comb += self.state_w_pc.i_data.eq(nia)
 948                             # reset SRCSTEP before returning to Fetch
 949                             if self.svp64_en:
 950                                 with m.If(pdecode2.loop_continue):
 951                                     comb += new_svstate.srcstep.eq(0)
 952                                     comb += new_svstate.dststep.eq(0)
 953                                     comb += update_svstate.eq(1)
 954                             else:
 955                                 comb += new_svstate.srcstep.eq(0)
 956                                 comb += new_svstate.dststep.eq(0)
 957                                 comb += update_svstate.eq(1)
 958                             m.next = "ISSUE_START"
 959
 960                         # returning to Execute? then, first update SRCSTEP
 961                         with m.Else():
 962                             comb += new_svstate.srcstep.eq(next_srcstep)
 963                             comb += new_svstate.dststep.eq(next_dststep)
 964                             comb += update_svstate.eq(1)
 965                             # return to mask skip loop
 966                             m.next = "PRED_SKIP"
 967
 968                 with m.Else():
 969                     comb += dbg.core_stopped_i.eq(1)
 970                     if flush_needed:
 971                         # request the icache to stop asserting "failed"
 972                         comb += core.icache.flush_in.eq(1)
 973                     # stop instruction fault
 974                     sync += pdecode2.instr_fault.eq(0)
 975                     if flush_needed:
 976                         # request the icache to stop asserting "failed"
 977                         comb += core.icache.flush_in.eq(1)
 978                     # stop instruction fault
 979                     sync += pdecode2.instr_fault.eq(0)
 980                     # while stopped, allow updating the MSR, PC and SVSTATE
 981                     with m.If(self.msr_i.ok):
 982                         comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
 983                         comb += self.state_w_msr.i_data.eq(self.msr_i.data)
 984                         sync += msr_changed.eq(1)
 985                     with m.If(self.pc_i.ok):
 986                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 987                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 988                         sync += pc_changed.eq(1)
 989                     with m.If(self.svstate_i.ok):
 990                         comb += new_svstate.eq(self.svstate_i.data)
 991                         comb += update_svstate.eq(1)
 992                         sync += sv_changed.eq(1)
 993
 994         # check if svstate needs updating: if so, write it to State Regfile
 995         with m.If(update_svstate):
 996             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 997             comb += self.state_w_sv.i_data.eq(new_svstate)
 998             sync += cur_state.svstate.eq(new_svstate)  # for next clock
 999
1000     def execute_fsm(self, m, core, msr_changed, pc_changed, sv_changed,
1001                     exec_insn_i_valid, exec_insn_o_ready,
1002                     exec_pc_o_valid, exec_pc_i_ready):
1003         """execute FSM
1004
1005         execute FSM. this interacts with the "issue" FSM
1006         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1007         (outgoing). SVP64 RM prefixes have already been set up by the
1008         "issue" phase, so execute is fairly straightforward.
1009         """
1010
1011         comb = m.d.comb
1012         sync = m.d.sync
1013         pdecode2 = self.pdecode2
1014
1015         # temporaries
1016         core_busy_o = core.n.o_data.busy_o  # core is busy
1017         core_ivalid_i = core.p.i_valid              # instruction is valid
1018
1019         if hasattr(core, "icache"):
1020             fetch_failed = core.icache.i_out.fetch_failed
1021         else:
1022             fetch_failed = Const(0, 1)
1023
1024         with m.FSM(name="exec_fsm"):
1025
1026             # waiting for instruction bus (stays there until not busy)
1027             with m.State("INSN_START"):
1028                 comb += exec_insn_o_ready.eq(1)
1029                 with m.If(exec_insn_i_valid):
1030                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1031                     sync += sv_changed.eq(0)
1032                     sync += pc_changed.eq(0)
1033                     sync += msr_changed.eq(0)
1034                     with m.If(core.p.o_ready):  # only move if accepted
1035                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1036
1037             # instruction started: must wait till it finishes
1038             with m.State("INSN_ACTIVE"):
1039                 # note changes to MSR, PC and SVSTATE
1040                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1041                     sync += sv_changed.eq(1)
1042                 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1043                     sync += msr_changed.eq(1)
1044                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1045                     sync += pc_changed.eq(1)
1046                 with m.If(~core_busy_o):  # instruction done!
1047                     comb += exec_pc_o_valid.eq(1)
1048                     with m.If(exec_pc_i_ready):
1049                         # when finished, indicate "done".
1050                         # however, if there was an exception, the instruction
1051                         # is *not* yet done.  this is an implementation
1052                         # detail: we choose to implement exceptions by
1053                         # taking the exception information from the LDST
1054                         # unit, putting that *back* into the PowerDecoder2,
1055                         # and *re-running the entire instruction*.
1056                         # if we erroneously indicate "done" here, it is as if
1057                         # there were *TWO* instructions:
1058                         # 1) the failed LDST 2) a TRAP.
1059                         with m.If(~pdecode2.ldst_exc.happened &
1060                                   ~fetch_failed):
1061                             comb += self.insn_done.eq(1)
1062                         m.next = "INSN_START"  # back to fetch
1063
1064     def setup_peripherals(self, m):
1065         comb, sync = m.d.comb, m.d.sync
1066
1067         # okaaaay so the debug module must be in coresync clock domain
1068         # but NOT its reset signal. to cope with this, set every single
1069         # submodule explicitly in coresync domain, debug and JTAG
1070         # in their own one but using *external* reset.
1071         csd = DomainRenamer("coresync")
1072         dbd = DomainRenamer(self.dbg_domain)
1073
1074         m.submodules.core = core = csd(self.core)
1075         # this _so_ needs sorting out.  ICache is added down inside
1076         # LoadStore1 and is already a submodule of LoadStore1
1077         if not isinstance(self.imem, ICache):
1078             m.submodules.imem = imem = csd(self.imem)
1079         m.submodules.dbg = dbg = dbd(self.dbg)
1080         if self.jtag_en:
1081             m.submodules.jtag = jtag = dbd(self.jtag)
1082             # TODO: UART2GDB mux, here, from external pin
1083             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1084             sync += dbg.dmi.connect_to(jtag.dmi)
1085
1086         cur_state = self.cur_state
1087
1088         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1089         if self.sram4x4k:
1090             for i, sram in enumerate(self.sram4k):
1091                 m.submodules["sram4k_%d" % i] = csd(sram)
1092                 comb += sram.enable.eq(self.wb_sram_en)
1093
1094         # XICS interrupt handler
1095         if self.xics:
1096             m.submodules.xics_icp = icp = csd(self.xics_icp)
1097             m.submodules.xics_ics = ics = csd(self.xics_ics)
1098             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1099             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
1100
1101         # GPIO test peripheral
1102         if self.gpio:
1103             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1104
1105         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1106         # XXX causes litex ECP5 test to get wrong idea about input and output
1107         # (but works with verilator sim *sigh*)
1108         # if self.gpio and self.xics:
1109         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1110
1111         # instruction decoder
1112         pdecode = create_pdecode()
1113         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1114         if self.svp64_en:
1115             m.submodules.svp64 = svp64 = csd(self.svp64)
1116
1117         # convenience
1118         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1119         intrf = self.core.regs.rf['int']
1120
1121         # clock delay power-on reset
1122         cd_por = ClockDomain(reset_less=True)
1123         cd_sync = ClockDomain()
1124         core_sync = ClockDomain("coresync")
1125         m.domains += cd_por, cd_sync, core_sync
1126         if self.dbg_domain != "sync":
1127             dbg_sync = ClockDomain(self.dbg_domain)
1128             m.domains += dbg_sync
1129
1130         ti_rst = Signal(reset_less=True)
1131         delay = Signal(range(4), reset=3)
1132         with m.If(delay != 0):
1133             m.d.por += delay.eq(delay - 1)
1134         comb += cd_por.clk.eq(ClockSignal())
1135
1136         # power-on reset delay
1137         core_rst = ResetSignal("coresync")
1138         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1139         comb += core_rst.eq(ti_rst)
1140
1141         # debug clock is same as coresync, but reset is *main external*
1142         if self.dbg_domain != "sync":
1143             dbg_rst = ResetSignal(self.dbg_domain)
1144             comb += dbg_rst.eq(ResetSignal())
1145
1146         # busy/halted signals from core
1147         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
1148         comb += self.busy_o.eq(core_busy_o)
1149         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1150
1151         # temporary hack: says "go" immediately for both address gen and ST
1152         l0 = core.l0
1153         ldst = core.fus.fus['ldst0']
1154         st_go_edge = rising_edge(m, ldst.st.rel_o)
1155         # link addr-go direct to rel
1156         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1157         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
1158
1159     def elaborate(self, platform):
1160         m = Module()
1161         # convenience
1162         comb, sync = m.d.comb, m.d.sync
1163         cur_state = self.cur_state
1164         pdecode2 = self.pdecode2
1165         dbg = self.dbg
1166         core = self.core
1167
1168         # set up peripherals and core
1169         core_rst = self.core_rst
1170         self.setup_peripherals(m)
1171
1172         # reset current state if core reset requested
1173         with m.If(core_rst):
1174             m.d.sync += self.cur_state.eq(0)
1175
1176         # PC and instruction from I-Memory
1177         comb += self.pc_o.eq(cur_state.pc)
1178         pc_changed = Signal()  # note write to PC
1179         msr_changed = Signal()  # note write to MSR
1180         sv_changed = Signal()  # note write to SVSTATE
1181
1182         # indicate to outside world if any FU is still executing
1183         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1184
1185         # read state either from incoming override or from regfile
1186         msr = state_get(m, core_rst, self.msr_i,
1187                        "msr",                  # read MSR
1188                        self.state_r_msr, StateRegs.MSR)
1189         pc = state_get(m, core_rst, self.pc_i,
1190                        "pc",                  # read PC
1191                        self.state_r_pc, StateRegs.PC)
1192         svstate = state_get(m, core_rst, self.svstate_i,
1193                             "svstate",   # read SVSTATE
1194                             self.state_r_sv, StateRegs.SVSTATE)
1195
1196         # don't write pc every cycle
1197         comb += self.state_w_pc.wen.eq(0)
1198         comb += self.state_w_pc.i_data.eq(0)
1199
1200         # address of the next instruction, in the absence of a branch
1201         # depends on the instruction size
1202         nia = Signal(64)
1203
1204         # connect up debug signals
1205         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1206         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1207         comb += dbg.state.pc.eq(pc)
1208         comb += dbg.state.svstate.eq(svstate)
1209         comb += dbg.state.msr.eq(msr)
1210
1211         # pass the prefix mode from Fetch to Issue, so the latter can loop
1212         # on VL==0
1213         is_svp64_mode = Signal()
1214
1215         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1216         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1217         # these are the handshake signals between each
1218
1219         # fetch FSM can run as soon as the PC is valid
1220         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1221         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1222
1223         # fetch FSM hands over the instruction to be decoded / issued
1224         fetch_insn_o_valid = Signal()
1225         fetch_insn_i_ready = Signal()
1226
1227         # predicate fetch FSM decodes and fetches the predicate
1228         pred_insn_i_valid = Signal()
1229         pred_insn_o_ready = Signal()
1230
1231         # predicate fetch FSM delivers the masks
1232         pred_mask_o_valid = Signal()
1233         pred_mask_i_ready = Signal()
1234
1235         # issue FSM delivers the instruction to the be executed
1236         exec_insn_i_valid = Signal()
1237         exec_insn_o_ready = Signal()
1238
1239         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1240         exec_pc_o_valid = Signal()
1241         exec_pc_i_ready = Signal()
1242
1243         # the FSMs here are perhaps unusual in that they detect conditions
1244         # then "hold" information, combinatorially, for the core
1245         # (as opposed to using sync - which would be on a clock's delay)
1246         # this includes the actual opcode, valid flags and so on.
1247
1248         # Fetch, then predicate fetch, then Issue, then Execute.
1249         # Issue is where the VL for-loop # lives.  the ready/valid
1250         # signalling is used to communicate between the four.
1251
1252         # set up Fetch FSM
1253         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1254                          self.imem, core_rst, pdecode2, cur_state,
1255                          dbg, core, svstate, nia, is_svp64_mode)
1256         m.submodules.fetch = fetch
1257         # connect up in/out data to existing Signals
1258         comb += fetch.p.i_data.pc.eq(pc)
1259         comb += fetch.p.i_data.msr.eq(msr)
1260         # and the ready/valid signalling
1261         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1262         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1263         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1264         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1265
1266         self.issue_fsm(m, core, msr_changed, pc_changed, sv_changed, nia,
1267                        dbg, core_rst, is_svp64_mode,
1268                        fetch_pc_o_ready, fetch_pc_i_valid,
1269                        fetch_insn_o_valid, fetch_insn_i_ready,
1270                        pred_insn_i_valid, pred_insn_o_ready,
1271                        pred_mask_o_valid, pred_mask_i_ready,
1272                        exec_insn_i_valid, exec_insn_o_ready,
1273                        exec_pc_o_valid, exec_pc_i_ready)
1274
1275         if self.svp64_en:
1276             self.fetch_predicate_fsm(m,
1277                                      pred_insn_i_valid, pred_insn_o_ready,
1278                                      pred_mask_o_valid, pred_mask_i_ready)
1279
1280         self.execute_fsm(m, core, msr_changed, pc_changed, sv_changed,
1281                          exec_insn_i_valid, exec_insn_o_ready,
1282                          exec_pc_o_valid, exec_pc_i_ready)
1283
1284         # this bit doesn't have to be in the FSM: connect up to read
1285         # regfiles on demand from DMI
1286         self.do_dmi(m, dbg)
1287
1288         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1289         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1290         self.tb_dec_fsm(m, cur_state.dec)
1291
1292         return m
1293
1294     def do_dmi(self, m, dbg):
1295         """deals with DMI debug requests
1296
1297         currently only provides read requests for the INT regfile, CR and XER
1298         it will later also deal with *writing* to these regfiles.
1299         """
1300         comb = m.d.comb
1301         sync = m.d.sync
1302         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1303         intrf = self.core.regs.rf['int']
1304
1305         with m.If(d_reg.req):  # request for regfile access being made
1306             # TODO: error-check this
1307             # XXX should this be combinatorial?  sync better?
1308             if intrf.unary:
1309                 comb += self.int_r.ren.eq(1 << d_reg.addr)
1310             else:
1311                 comb += self.int_r.addr.eq(d_reg.addr)
1312                 comb += self.int_r.ren.eq(1)
1313         d_reg_delay = Signal()
1314         sync += d_reg_delay.eq(d_reg.req)
1315         with m.If(d_reg_delay):
1316             # data arrives one clock later
1317             comb += d_reg.data.eq(self.int_r.o_data)
1318             comb += d_reg.ack.eq(1)
1319
1320         # sigh same thing for CR debug
1321         with m.If(d_cr.req):  # request for regfile access being made
1322             comb += self.cr_r.ren.eq(0b11111111)  # enable all
1323         d_cr_delay = Signal()
1324         sync += d_cr_delay.eq(d_cr.req)
1325         with m.If(d_cr_delay):
1326             # data arrives one clock later
1327             comb += d_cr.data.eq(self.cr_r.o_data)
1328             comb += d_cr.ack.eq(1)
1329
1330         # aaand XER...
1331         with m.If(d_xer.req):  # request for regfile access being made
1332             comb += self.xer_r.ren.eq(0b111111)  # enable all
1333         d_xer_delay = Signal()
1334         sync += d_xer_delay.eq(d_xer.req)
1335         with m.If(d_xer_delay):
1336             # data arrives one clock later
1337             comb += d_xer.data.eq(self.xer_r.o_data)
1338             comb += d_xer.ack.eq(1)
1339
1340     def tb_dec_fsm(self, m, spr_dec):
1341         """tb_dec_fsm
1342
1343         this is a FSM for updating either dec or tb.  it runs alternately
1344         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1345         value to DEC, however the regfile has "passthrough" on it so this
1346         *should* be ok.
1347
1348         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1349         """
1350
1351         comb, sync = m.d.comb, m.d.sync
1352         fast_rf = self.core.regs.rf['fast']
1353         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
1354         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
1355
1356         with m.FSM() as fsm:
1357
1358             # initiates read of current DEC
1359             with m.State("DEC_READ"):
1360                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1361                 comb += fast_r_dectb.ren.eq(1)
1362                 m.next = "DEC_WRITE"
1363
1364             # waits for DEC read to arrive (1 cycle), updates with new value
1365             with m.State("DEC_WRITE"):
1366                 new_dec = Signal(64)
1367                 # TODO: MSR.LPCR 32-bit decrement mode
1368                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1369                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1370                 comb += fast_w_dectb.wen.eq(1)
1371                 comb += fast_w_dectb.i_data.eq(new_dec)
1372                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
1373                 m.next = "TB_READ"
1374
1375             # initiates read of current TB
1376             with m.State("TB_READ"):
1377                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1378                 comb += fast_r_dectb.ren.eq(1)
1379                 m.next = "TB_WRITE"
1380
1381             # waits for read TB to arrive, initiates write of current TB
1382             with m.State("TB_WRITE"):
1383                 new_tb = Signal(64)
1384                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1385                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1386                 comb += fast_w_dectb.wen.eq(1)
1387                 comb += fast_w_dectb.i_data.eq(new_tb)
1388                 m.next = "DEC_READ"
1389
1390         return m
1391
1392     def __iter__(self):
1393         yield from self.pc_i.ports()
1394         yield from self.msr_i.ports()
1395         yield self.pc_o
1396         yield self.memerr_o
1397         yield from self.core.ports()
1398         yield from self.imem.ports()
1399         yield self.core_bigendian_i
1400         yield self.busy_o
1401
1402     def ports(self):
1403         return list(self)
1404
1405     def external_ports(self):
1406         ports = self.pc_i.ports()
1407         ports = self.msr_i.ports()
1408         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1409                   ]
1410
1411         if self.jtag_en:
1412             ports += list(self.jtag.external_ports())
1413         else:
1414             # don't add DMI if JTAG is enabled
1415             ports += list(self.dbg.dmi.ports())
1416
1417         ports += list(self.imem.ibus.fields.values())
1418         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1419
1420         if self.sram4x4k:
1421             for sram in self.sram4k:
1422                 ports += list(sram.bus.fields.values())
1423
1424         if self.xics:
1425             ports += list(self.xics_icp.bus.fields.values())
1426             ports += list(self.xics_ics.bus.fields.values())
1427             ports.append(self.int_level_i)
1428
1429         if self.gpio:
1430             ports += list(self.simple_gpio.bus.fields.values())
1431             ports.append(self.gpio_o)
1432
1433         return ports
1434
1435     def ports(self):
1436         return list(self)
1437
1438
1439 class TestIssuer(Elaboratable):
1440     def __init__(self, pspec):
1441         self.ti = TestIssuerInternal(pspec)
1442         self.pll = DummyPLL(instance=True)
1443
1444         # PLL direct clock or not
1445         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1446         if self.pll_en:
1447             self.pll_test_o = Signal(reset_less=True)
1448             self.pll_vco_o = Signal(reset_less=True)
1449             self.clk_sel_i = Signal(2, reset_less=True)
1450             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1451             self.pllclk_clk = ClockSignal("pllclk")
1452
1453     def elaborate(self, platform):
1454         m = Module()
1455         comb = m.d.comb
1456
1457         # TestIssuer nominally runs at main clock, actually it is
1458         # all combinatorial internally except for coresync'd components
1459         m.submodules.ti = ti = self.ti
1460
1461         if self.pll_en:
1462             # ClockSelect runs at PLL output internal clock rate
1463             m.submodules.wrappll = pll = self.pll
1464
1465             # add clock domains from PLL
1466             cd_pll = ClockDomain("pllclk")
1467             m.domains += cd_pll
1468
1469             # PLL clock established.  has the side-effect of running clklsel
1470             # at the PLL's speed (see DomainRenamer("pllclk") above)
1471             pllclk = self.pllclk_clk
1472             comb += pllclk.eq(pll.clk_pll_o)
1473
1474             # wire up external 24mhz to PLL
1475             #comb += pll.clk_24_i.eq(self.ref_clk)
1476             # output 18 mhz PLL test signal, and analog oscillator out
1477             comb += self.pll_test_o.eq(pll.pll_test_o)
1478             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1479
1480             # input to pll clock selection
1481             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1482
1483             # now wire up ResetSignals.  don't mind them being in this domain
1484             pll_rst = ResetSignal("pllclk")
1485             comb += pll_rst.eq(ResetSignal())
1486
1487         # internal clock is set to selector clock-out.  has the side-effect of
1488         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1489         # debug clock runs at coresync internal clock
1490         cd_coresync = ClockDomain("coresync")
1491         #m.domains += cd_coresync
1492         if self.ti.dbg_domain != 'sync':
1493             cd_dbgsync = ClockDomain("dbgsync")
1494             #m.domains += cd_dbgsync
1495         intclk = ClockSignal("coresync")
1496         dbgclk = ClockSignal(self.ti.dbg_domain)
1497         # XXX BYPASS PLL XXX
1498         # XXX BYPASS PLL XXX
1499         # XXX BYPASS PLL XXX
1500         if self.pll_en:
1501             comb += intclk.eq(self.ref_clk)
1502         else:
1503             comb += intclk.eq(ClockSignal())
1504         if self.ti.dbg_domain != 'sync':
1505             dbgclk = ClockSignal(self.ti.dbg_domain)
1506             comb += dbgclk.eq(intclk)
1507
1508         return m
1509
1510     def ports(self):
1511         return list(self.ti.ports()) + list(self.pll.ports()) + \
1512             [ClockSignal(), ResetSignal()]
1513
1514     def external_ports(self):
1515         ports = self.ti.external_ports()
1516         ports.append(ClockSignal())
1517         ports.append(ResetSignal())
1518         if self.pll_en:
1519             ports.append(self.clk_sel_i)
1520             ports.append(self.pll.clk_24_i)
1521             ports.append(self.pll_test_o)
1522             ports.append(self.pll_vco_o)
1523             ports.append(self.pllclk_clk)
1524             ports.append(self.ref_clk)
1525         return ports
1526
1527
1528 if __name__ == '__main__':
1529     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1530              'spr': 1,
1531              'div': 1,
1532              'mul': 1,
1533              'shiftrot': 1
1534              }
1535     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1536                          imem_ifacetype='bare_wb',
1537                          addr_wid=48,
1538                          mask_wid=8,
1539                          reg_wid=64,
1540                          units=units)
1541     dut = TestIssuer(pspec)
1542     vl = main(dut, ports=dut.ports(), name="test_issuer")
1543
1544     if len(sys.argv) == 1:
1545         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1546         with open("test_issuer.il", "w") as f:
1547             f.write(vl)