src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the PC
  70     res = Signal(64, reset_less=True, name=name)
  71     res_ok_delay = Signal(name="%s_ok_delay" % name)
  72     with m.If(~core_rst):
  73         sync += res_ok_delay.eq(~state_i.ok)
  74         with m.If(state_i.ok):
  75             # incoming override (start from pc_i)
  76             comb += res.eq(state_i.data)
  77         with m.Else():
  78             # otherwise read StateRegs regfile for PC...
  79             comb += regfile.ren.eq(1 << regnum)
  80         # ... but on a 1-clock delay
  81         with m.If(res_ok_delay):
  82             comb += res.eq(regfile.o_data)
  83     return res
  84
  85
  86 def get_predint(m, mask, name):
  87     """decode SVP64 predicate integer mask field to reg number and invert
  88     this is identical to the equivalent function in ISACaller except that
  89     it doesn't read the INT directly, it just decodes "what needs to be done"
  90     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  91
  92     * all1s is set to indicate that no mask is to be applied.
  93     * regread indicates the GPR register number to be read
  94     * invert is set to indicate that the register value is to be inverted
  95     * unary indicates that the contents of the register is to be shifted 1<<r3
  96     """
  97     comb = m.d.comb
  98     regread = Signal(5, name=name+"regread")
  99     invert = Signal(name=name+"invert")
 100     unary = Signal(name=name+"unary")
 101     all1s = Signal(name=name+"all1s")
 102     with m.Switch(mask):
 103         with m.Case(SVP64PredInt.ALWAYS.value):
 104             comb += all1s.eq(1)      # use 0b1111 (all ones)
 105         with m.Case(SVP64PredInt.R3_UNARY.value):
 106             comb += regread.eq(3)
 107             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 108         with m.Case(SVP64PredInt.R3.value):
 109             comb += regread.eq(3)
 110         with m.Case(SVP64PredInt.R3_N.value):
 111             comb += regread.eq(3)
 112             comb += invert.eq(1)
 113         with m.Case(SVP64PredInt.R10.value):
 114             comb += regread.eq(10)
 115         with m.Case(SVP64PredInt.R10_N.value):
 116             comb += regread.eq(10)
 117             comb += invert.eq(1)
 118         with m.Case(SVP64PredInt.R30.value):
 119             comb += regread.eq(30)
 120         with m.Case(SVP64PredInt.R30_N.value):
 121             comb += regread.eq(30)
 122             comb += invert.eq(1)
 123     return regread, invert, unary, all1s
 124
 125
 126 def get_predcr(m, mask, name):
 127     """decode SVP64 predicate CR to reg number field and invert status
 128     this is identical to _get_predcr in ISACaller
 129     """
 130     comb = m.d.comb
 131     idx = Signal(2, name=name+"idx")
 132     invert = Signal(name=name+"crinvert")
 133     with m.Switch(mask):
 134         with m.Case(SVP64PredCR.LT.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(0)
 137         with m.Case(SVP64PredCR.GE.value):
 138             comb += idx.eq(CR.LT)
 139             comb += invert.eq(1)
 140         with m.Case(SVP64PredCR.GT.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(0)
 143         with m.Case(SVP64PredCR.LE.value):
 144             comb += idx.eq(CR.GT)
 145             comb += invert.eq(1)
 146         with m.Case(SVP64PredCR.EQ.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(0)
 149         with m.Case(SVP64PredCR.NE.value):
 150             comb += idx.eq(CR.EQ)
 151             comb += invert.eq(1)
 152         with m.Case(SVP64PredCR.SO.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(0)
 155         with m.Case(SVP64PredCR.NS.value):
 156             comb += idx.eq(CR.SO)
 157             comb += invert.eq(1)
 158     return idx, invert
 159
 160
 161 # Fetch Finite State Machine.
 162 # WARNING: there are currently DriverConflicts but it's actually working.
 163 # TODO, here: everything that is global in nature, information from the
 164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 165 # not only that: TestIssuerInternal.imem can entirely move into here
 166 # because imem is only ever accessed inside the FetchFSM.
 167 class FetchFSM(ControlBase):
 168     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 169                  pdecode2, cur_state,
 170                  dbg, core, svstate, nia, is_svp64_mode):
 171         self.allow_overlap = allow_overlap
 172         self.svp64_en = svp64_en
 173         self.imem = imem
 174         self.core_rst = core_rst
 175         self.pdecode2 = pdecode2
 176         self.cur_state = cur_state
 177         self.dbg = dbg
 178         self.core = core
 179         self.svstate = svstate
 180         self.nia = nia
 181         self.is_svp64_mode = is_svp64_mode
 182
 183         # set up pipeline ControlBase and allocate i/o specs
 184         # (unusual: normally done by the Pipeline API)
 185         super().__init__(stage=self)
 186         self.p.i_data, self.n.o_data = self.new_specs(None)
 187         self.i, self.o = self.p.i_data, self.n.o_data
 188
 189     # next 3 functions are Stage API Compliance
 190     def setup(self, m, i):
 191         pass
 192
 193     def ispec(self):
 194         return FetchInput()
 195
 196     def ospec(self):
 197         return FetchOutput()
 198
 199     def elaborate(self, platform):
 200         """fetch FSM
 201
 202         this FSM performs fetch of raw instruction data, partial-decodes
 203         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 204         read a 2nd 32-bit quantity if that occurs.
 205         """
 206         m = super().elaborate(platform)
 207
 208         dbg = self.dbg
 209         core = self.core
 210         pc = self.i.pc
 211         svstate = self.svstate
 212         nia = self.nia
 213         is_svp64_mode = self.is_svp64_mode
 214         fetch_pc_o_ready = self.p.o_ready
 215         fetch_pc_i_valid = self.p.i_valid
 216         fetch_insn_o_valid = self.n.o_valid
 217         fetch_insn_i_ready = self.n.i_ready
 218
 219         comb = m.d.comb
 220         sync = m.d.sync
 221         pdecode2 = self.pdecode2
 222         cur_state = self.cur_state
 223         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 224
 225         msr_read = Signal(reset=1)
 226
 227         # also note instruction fetch failed
 228         if hasattr(core, "icache"):
 229             fetch_failed = core.icache.i_out.fetch_failed
 230             flush_needed = True
 231         else:
 232             fetch_failed = Const(0, 1)
 233             flush_needed = False
 234
 235         # don't read msr every cycle
 236         staterf = self.core.regs.rf['state']
 237         state_r_msr = staterf.r_ports['msr']  # MSR rd
 238
 239         comb += state_r_msr.ren.eq(0)
 240
 241         with m.FSM(name='fetch_fsm'):
 242
 243             # waiting (zzz)
 244             with m.State("IDLE"):
 245                 with m.If(~dbg.stopping_o & ~fetch_failed):
 246                     comb += fetch_pc_o_ready.eq(1)
 247                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 248                     # instruction allowed to go: start by reading the PC
 249                     # capture the PC and also drop it into Insn Memory
 250                     # we have joined a pair of combinatorial memory
 251                     # lookups together.  this is Generally Bad.
 252                     comb += self.imem.a_pc_i.eq(pc)
 253                     comb += self.imem.a_i_valid.eq(1)
 254                     comb += self.imem.f_i_valid.eq(1)
 255                     sync += cur_state.pc.eq(pc)
 256                     sync += cur_state.svstate.eq(svstate)  # and svstate
 257
 258                     # initiate read of MSR. arrives one clock later
 259                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 260                     sync += msr_read.eq(0)
 261
 262                     m.next = "INSN_READ"  # move to "wait for bus" phase
 263
 264             # dummy pause to find out why simulation is not keeping up
 265             with m.State("INSN_READ"):
 266                 if self.allow_overlap:
 267                     stopping = dbg.stopping_o
 268                 else:
 269                     stopping = Const(0)
 270                 with m.If(stopping):
 271                     # stopping: jump back to idle
 272                     m.next = "IDLE"
 273                 with m.Else():
 274                     # one cycle later, msr/sv read arrives.  valid only once.
 275                     with m.If(~msr_read):
 276                         sync += msr_read.eq(1)  # yeah don't read it again
 277                         sync += cur_state.msr.eq(state_r_msr.o_data)
 278                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 279                         # busy but not fetch failed: stay in wait-read
 280                         comb += self.imem.a_i_valid.eq(1)
 281                         comb += self.imem.f_i_valid.eq(1)
 282                     with m.Else():
 283                         # not busy (or fetch failed!): instruction fetched
 284                         # when fetch failed, the instruction gets ignored
 285                         # by the decoder
 286                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 287                         if self.svp64_en:
 288                             svp64 = self.svp64
 289                             # decode the SVP64 prefix, if any
 290                             comb += svp64.raw_opcode_in.eq(insn)
 291                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 292                             # pass the decoded prefix (if any) to PowerDecoder2
 293                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 294                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 295                             # remember whether this is a prefixed instruction,
 296                             # so the FSM can readily loop when VL==0
 297                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 298                             # calculate the address of the following instruction
 299                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 300                             sync += nia.eq(cur_state.pc + insn_size)
 301                             with m.If(~svp64.is_svp64_mode):
 302                                 # with no prefix, store the instruction
 303                                 # and hand it directly to the next FSM
 304                                 sync += dec_opcode_o.eq(insn)
 305                                 m.next = "INSN_READY"
 306                             with m.Else():
 307                                 # fetch the rest of the instruction from memory
 308                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 309                                 comb += self.imem.a_i_valid.eq(1)
 310                                 comb += self.imem.f_i_valid.eq(1)
 311                                 m.next = "INSN_READ2"
 312                         else:
 313                             # not SVP64 - 32-bit only
 314                             sync += nia.eq(cur_state.pc + 4)
 315                             sync += dec_opcode_o.eq(insn)
 316                             m.next = "INSN_READY"
 317
 318             with m.State("INSN_READ2"):
 319                 with m.If(self.imem.f_busy_o):  # zzz...
 320                     # busy: stay in wait-read
 321                     comb += self.imem.a_i_valid.eq(1)
 322                     comb += self.imem.f_i_valid.eq(1)
 323                 with m.Else():
 324                     # not busy: instruction fetched
 325                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 326                     sync += dec_opcode_o.eq(insn)
 327                     m.next = "INSN_READY"
 328                     # TODO: probably can start looking at pdecode2.rm_dec
 329                     # here or maybe even in INSN_READ state, if svp64_mode
 330                     # detected, in order to trigger - and wait for - the
 331                     # predicate reading.
 332                     if self.svp64_en:
 333                         pmode = pdecode2.rm_dec.predmode
 334                     """
 335                     if pmode != SVP64PredMode.ALWAYS.value:
 336                         fire predicate loading FSM and wait before
 337                         moving to INSN_READY
 338                     else:
 339                         sync += self.srcmask.eq(-1) # set to all 1s
 340                         sync += self.dstmask.eq(-1) # set to all 1s
 341                         m.next = "INSN_READY"
 342                     """
 343
 344             with m.State("INSN_READY"):
 345                 # hand over the instruction, to be decoded
 346                 comb += fetch_insn_o_valid.eq(1)
 347                 with m.If(fetch_insn_i_ready):
 348                     m.next = "IDLE"
 349
 350         # whatever was done above, over-ride it if core reset is held
 351         with m.If(self.core_rst):
 352             sync += nia.eq(0)
 353
 354         return m
 355
 356
 357 class TestIssuerInternal(Elaboratable):
 358     """TestIssuer - reads instructions from TestMemory and issues them
 359
 360     efficiency and speed is not the main goal here: functional correctness
 361     and code clarity is.  optimisations (which almost 100% interfere with
 362     easy understanding) come later.
 363     """
 364
 365     def __init__(self, pspec):
 366
 367         # test is SVP64 is to be enabled
 368         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 369
 370         # and if regfiles are reduced
 371         self.regreduce_en = (hasattr(pspec, "regreduce") and
 372                              (pspec.regreduce == True))
 373
 374         # and if overlap requested
 375         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 376                               (pspec.allow_overlap == True))
 377
 378         # JTAG interface.  add this right at the start because if it's
 379         # added it *modifies* the pspec, by adding enable/disable signals
 380         # for parts of the rest of the core
 381         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 382         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 383         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 384         if self.jtag_en:
 385             # XXX MUST keep this up-to-date with litex, and
 386             # soc-cocotb-sim, and err.. all needs sorting out, argh
 387             subset = ['uart',
 388                       'mtwi',
 389                       'eint', 'gpio', 'mspi0',
 390                       # 'mspi1', - disabled for now
 391                       # 'pwm', 'sd0', - disabled for now
 392                       'sdr']
 393             self.jtag = JTAG(get_pinspecs(subset=subset),
 394                              domain=self.dbg_domain)
 395             # add signals to pspec to enable/disable icache and dcache
 396             # (or data and intstruction wishbone if icache/dcache not included)
 397             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 398             # TODO: do we actually care if these are not domain-synchronised?
 399             # honestly probably not.
 400             pspec.wb_icache_en = self.jtag.wb_icache_en
 401             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 402             self.wb_sram_en = self.jtag.wb_sram_en
 403         else:
 404             self.wb_sram_en = Const(1)
 405
 406         # add 4k sram blocks?
 407         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 408                          pspec.sram4x4kblock == True)
 409         if self.sram4x4k:
 410             self.sram4k = []
 411             for i in range(4):
 412                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 413                                                     # features={'err'}
 414                                                     ))
 415
 416         # add interrupt controller?
 417         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 418         if self.xics:
 419             self.xics_icp = XICS_ICP()
 420             self.xics_ics = XICS_ICS()
 421             self.int_level_i = self.xics_ics.int_level_i
 422
 423         # add GPIO peripheral?
 424         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 425         if self.gpio:
 426             self.simple_gpio = SimpleGPIO()
 427             self.gpio_o = self.simple_gpio.gpio_o
 428
 429         # main instruction core.  suitable for prototyping / demo only
 430         self.core = core = NonProductionCore(pspec)
 431         self.core_rst = ResetSignal("coresync")
 432
 433         # instruction decoder.  goes into Trap Record
 434         #pdecode = create_pdecode()
 435         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 436         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 437                                      opkls=IssuerDecode2ToOperand,
 438                                      svp64_en=self.svp64_en,
 439                                      regreduce_en=self.regreduce_en)
 440         pdecode = self.pdecode2.dec
 441
 442         if self.svp64_en:
 443             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 444
 445         # Test Instruction memory
 446         if hasattr(core, "icache"):
 447             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 448             # truly dreadful.  needs a huge reorg.
 449             pspec.icache = core.icache
 450         self.imem = ConfigFetchUnit(pspec).fu
 451
 452         # DMI interface
 453         self.dbg = CoreDebug()
 454
 455         # instruction go/monitor
 456         self.pc_o = Signal(64, reset_less=True)
 457         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 458         self.svstate_i = Data(64, "svstate_i")  # ditto
 459         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 460         self.busy_o = Signal(reset_less=True)
 461         self.memerr_o = Signal(reset_less=True)
 462
 463         # STATE regfile read /write ports for PC, MSR, SVSTATE
 464         staterf = self.core.regs.rf['state']
 465         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 466         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 467         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 468         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 469
 470         # DMI interface access
 471         intrf = self.core.regs.rf['int']
 472         crrf = self.core.regs.rf['cr']
 473         xerrf = self.core.regs.rf['xer']
 474         self.int_r = intrf.r_ports['dmi']  # INT read
 475         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 476         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 477
 478         if self.svp64_en:
 479             # for predication
 480             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 481             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 482
 483         # hack method of keeping an eye on whether branch/trap set the PC
 484         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 485         self.state_nia.wen.name = 'state_nia_wen'
 486
 487         # pulse to synchronize the simulator at instruction end
 488         self.insn_done = Signal()
 489
 490         # indicate any instruction still outstanding, in execution
 491         self.any_busy = Signal()
 492
 493         if self.svp64_en:
 494             # store copies of predicate masks
 495             self.srcmask = Signal(64)
 496             self.dstmask = Signal(64)
 497
 498     def fetch_predicate_fsm(self, m,
 499                             pred_insn_i_valid, pred_insn_o_ready,
 500                             pred_mask_o_valid, pred_mask_i_ready):
 501         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 502            src/dest predicate masks
 503
 504         https://bugs.libre-soc.org/show_bug.cgi?id=617
 505         the predicates can be read here, by using IntRegs r_ports['pred']
 506         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 507         be done through multiple reads, extracting one relevant at a time.
 508         later, a faster way would be to use the 32-bit-wide CR port but
 509         this is more complex decoding, here.  equivalent code used in
 510         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 511
 512         note: this ENTIRE FSM is not to be called when svp64 is disabled
 513         """
 514         comb = m.d.comb
 515         sync = m.d.sync
 516         pdecode2 = self.pdecode2
 517         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 518         predmode = rm_dec.predmode
 519         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 520         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 521         # get src/dst step, so we can skip already used mask bits
 522         cur_state = self.cur_state
 523         srcstep = cur_state.svstate.srcstep
 524         dststep = cur_state.svstate.dststep
 525         cur_vl = cur_state.svstate.vl
 526
 527         # decode predicates
 528         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 529         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 530         sidx, scrinvert = get_predcr(m, srcpred, 's')
 531         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 532
 533         # store fetched masks, for either intpred or crpred
 534         # when src/dst step is not zero, the skipped mask bits need to be
 535         # shifted-out, before actually storing them in src/dest mask
 536         new_srcmask = Signal(64, reset_less=True)
 537         new_dstmask = Signal(64, reset_less=True)
 538
 539         with m.FSM(name="fetch_predicate"):
 540
 541             with m.State("FETCH_PRED_IDLE"):
 542                 comb += pred_insn_o_ready.eq(1)
 543                 with m.If(pred_insn_i_valid):
 544                     with m.If(predmode == SVP64PredMode.INT):
 545                         # skip fetching destination mask register, when zero
 546                         with m.If(dall1s):
 547                             sync += new_dstmask.eq(-1)
 548                             # directly go to fetch source mask register
 549                             # guaranteed not to be zero (otherwise predmode
 550                             # would be SVP64PredMode.ALWAYS, not INT)
 551                             comb += int_pred.addr.eq(sregread)
 552                             comb += int_pred.ren.eq(1)
 553                             m.next = "INT_SRC_READ"
 554                         # fetch destination predicate register
 555                         with m.Else():
 556                             comb += int_pred.addr.eq(dregread)
 557                             comb += int_pred.ren.eq(1)
 558                             m.next = "INT_DST_READ"
 559                     with m.Elif(predmode == SVP64PredMode.CR):
 560                         # go fetch masks from the CR register file
 561                         sync += new_srcmask.eq(0)
 562                         sync += new_dstmask.eq(0)
 563                         m.next = "CR_READ"
 564                     with m.Else():
 565                         sync += self.srcmask.eq(-1)
 566                         sync += self.dstmask.eq(-1)
 567                         m.next = "FETCH_PRED_DONE"
 568
 569             with m.State("INT_DST_READ"):
 570                 # store destination mask
 571                 inv = Repl(dinvert, 64)
 572                 with m.If(dunary):
 573                     # set selected mask bit for 1<<r3 mode
 574                     dst_shift = Signal(range(64))
 575                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 576                     sync += new_dstmask.eq(1 << dst_shift)
 577                 with m.Else():
 578                     # invert mask if requested
 579                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 580                 # skip fetching source mask register, when zero
 581                 with m.If(sall1s):
 582                     sync += new_srcmask.eq(-1)
 583                     m.next = "FETCH_PRED_SHIFT_MASK"
 584                 # fetch source predicate register
 585                 with m.Else():
 586                     comb += int_pred.addr.eq(sregread)
 587                     comb += int_pred.ren.eq(1)
 588                     m.next = "INT_SRC_READ"
 589
 590             with m.State("INT_SRC_READ"):
 591                 # store source mask
 592                 inv = Repl(sinvert, 64)
 593                 with m.If(sunary):
 594                     # set selected mask bit for 1<<r3 mode
 595                     src_shift = Signal(range(64))
 596                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 597                     sync += new_srcmask.eq(1 << src_shift)
 598                 with m.Else():
 599                     # invert mask if requested
 600                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 601                 m.next = "FETCH_PRED_SHIFT_MASK"
 602
 603             # fetch masks from the CR register file
 604             # implements the following loop:
 605             # idx, inv = get_predcr(mask)
 606             # mask = 0
 607             # for cr_idx in range(vl):
 608             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 609             #     if cr[idx] ^ inv:
 610             #         mask |= 1 << cr_idx
 611             # return mask
 612             with m.State("CR_READ"):
 613                 # CR index to be read, which will be ready by the next cycle
 614                 cr_idx = Signal.like(cur_vl, reset_less=True)
 615                 # submit the read operation to the regfile
 616                 with m.If(cr_idx != cur_vl):
 617                     # the CR read port is unary ...
 618                     # ren = 1 << cr_idx
 619                     # ... in MSB0 convention ...
 620                     # ren = 1 << (7 - cr_idx)
 621                     # ... and with an offset:
 622                     # ren = 1 << (7 - off - cr_idx)
 623                     idx = SVP64CROffs.CRPred + cr_idx
 624                     comb += cr_pred.ren.eq(1 << (7 - idx))
 625                     # signal data valid in the next cycle
 626                     cr_read = Signal(reset_less=True)
 627                     sync += cr_read.eq(1)
 628                     # load the next index
 629                     sync += cr_idx.eq(cr_idx + 1)
 630                 with m.Else():
 631                     # exit on loop end
 632                     sync += cr_read.eq(0)
 633                     sync += cr_idx.eq(0)
 634                     m.next = "FETCH_PRED_SHIFT_MASK"
 635                 with m.If(cr_read):
 636                     # compensate for the one cycle delay on the regfile
 637                     cur_cr_idx = Signal.like(cur_vl)
 638                     comb += cur_cr_idx.eq(cr_idx - 1)
 639                     # read the CR field, select the appropriate bit
 640                     cr_field = Signal(4)
 641                     scr_bit = Signal()
 642                     dcr_bit = Signal()
 643                     comb += cr_field.eq(cr_pred.o_data)
 644                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 645                                        ^ scrinvert)
 646                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 647                                        ^ dcrinvert)
 648                     # set the corresponding mask bit
 649                     bit_to_set = Signal.like(self.srcmask)
 650                     comb += bit_to_set.eq(1 << cur_cr_idx)
 651                     with m.If(scr_bit):
 652                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 653                     with m.If(dcr_bit):
 654                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 655
 656             with m.State("FETCH_PRED_SHIFT_MASK"):
 657                 # shift-out skipped mask bits
 658                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 659                 sync += self.dstmask.eq(new_dstmask >> dststep)
 660                 m.next = "FETCH_PRED_DONE"
 661
 662             with m.State("FETCH_PRED_DONE"):
 663                 comb += pred_mask_o_valid.eq(1)
 664                 with m.If(pred_mask_i_ready):
 665                     m.next = "FETCH_PRED_IDLE"
 666
 667     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 668                   dbg, core_rst, is_svp64_mode,
 669                   fetch_pc_o_ready, fetch_pc_i_valid,
 670                   fetch_insn_o_valid, fetch_insn_i_ready,
 671                   pred_insn_i_valid, pred_insn_o_ready,
 672                   pred_mask_o_valid, pred_mask_i_ready,
 673                   exec_insn_i_valid, exec_insn_o_ready,
 674                   exec_pc_o_valid, exec_pc_i_ready):
 675         """issue FSM
 676
 677         decode / issue FSM.  this interacts with the "fetch" FSM
 678         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 679         (outgoing). also interacts with the "execute" FSM
 680         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 681         (incoming).
 682         SVP64 RM prefixes have already been set up by the
 683         "fetch" phase, so execute is fairly straightforward.
 684         """
 685
 686         comb = m.d.comb
 687         sync = m.d.sync
 688         pdecode2 = self.pdecode2
 689         cur_state = self.cur_state
 690
 691         # temporaries
 692         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 693
 694         # for updating svstate (things like srcstep etc.)
 695         update_svstate = Signal()  # set this (below) if updating
 696         new_svstate = SVSTATERec("new_svstate")
 697         comb += new_svstate.eq(cur_state.svstate)
 698
 699         # precalculate srcstep+1 and dststep+1
 700         cur_srcstep = cur_state.svstate.srcstep
 701         cur_dststep = cur_state.svstate.dststep
 702         next_srcstep = Signal.like(cur_srcstep)
 703         next_dststep = Signal.like(cur_dststep)
 704         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 705         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 706
 707         # note if an exception happened.  in a pipelined or OoO design
 708         # this needs to be accompanied by "shadowing" (or stalling)
 709         exc_happened = self.core.o.exc_happened
 710         # also note instruction fetch failed
 711         if hasattr(core, "icache"):
 712             fetch_failed = core.icache.i_out.fetch_failed
 713             flush_needed = True
 714             # set to fault in decoder
 715             # update (highest priority) instruction fault
 716             rising_fetch_failed = rising_edge(m, fetch_failed)
 717             with m.If(rising_fetch_failed):
 718                 sync += pdecode2.instr_fault.eq(1)
 719         else:
 720             fetch_failed = Const(0, 1)
 721             flush_needed = False
 722
 723         with m.FSM(name="issue_fsm"):
 724
 725             # sync with the "fetch" phase which is reading the instruction
 726             # at this point, there is no instruction running, that
 727             # could inadvertently update the PC.
 728             with m.State("ISSUE_START"):
 729                 # reset instruction fault
 730                 sync += pdecode2.instr_fault.eq(0)
 731                 # wait on "core stop" release, before next fetch
 732                 # need to do this here, in case we are in a VL==0 loop
 733                 with m.If(~dbg.core_stop_o & ~core_rst):
 734                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 735                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 736                         m.next = "INSN_WAIT"
 737                 with m.Else():
 738                     # tell core it's stopped, and acknowledge debug handshake
 739                     comb += dbg.core_stopped_i.eq(1)
 740                     # while stopped, allow updating the PC and SVSTATE
 741                     with m.If(self.pc_i.ok):
 742                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 743                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 744                         sync += pc_changed.eq(1)
 745                     with m.If(self.svstate_i.ok):
 746                         comb += new_svstate.eq(self.svstate_i.data)
 747                         comb += update_svstate.eq(1)
 748                         sync += sv_changed.eq(1)
 749
 750             # wait for an instruction to arrive from Fetch
 751             with m.State("INSN_WAIT"):
 752                 if self.allow_overlap:
 753                     stopping = dbg.stopping_o
 754                 else:
 755                     stopping = Const(0)
 756                 with m.If(stopping):
 757                     # stopping: jump back to idle
 758                     m.next = "ISSUE_START"
 759                     if flush_needed:
 760                         # request the icache to stop asserting "failed"
 761                         comb += core.icache.flush_in.eq(1)
 762                     # stop instruction fault
 763                     sync += pdecode2.instr_fault.eq(0)
 764                 with m.Else():
 765                     comb += fetch_insn_i_ready.eq(1)
 766                     with m.If(fetch_insn_o_valid):
 767                         # loop into ISSUE_START if it's a SVP64 instruction
 768                         # and VL == 0.  this because VL==0 is a for-loop
 769                         # from 0 to 0 i.e. always, always a NOP.
 770                         cur_vl = cur_state.svstate.vl
 771                         with m.If(is_svp64_mode & (cur_vl == 0)):
 772                             # update the PC before fetching the next instruction
 773                             # since we are in a VL==0 loop, no instruction was
 774                             # executed that we could be overwriting
 775                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 776                             comb += self.state_w_pc.i_data.eq(nia)
 777                             comb += self.insn_done.eq(1)
 778                             m.next = "ISSUE_START"
 779                         with m.Else():
 780                             if self.svp64_en:
 781                                 m.next = "PRED_START"  # fetching predicate
 782                             else:
 783                                 m.next = "DECODE_SV"  # skip predication
 784
 785             with m.State("PRED_START"):
 786                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 787                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 788                     m.next = "MASK_WAIT"
 789
 790             with m.State("MASK_WAIT"):
 791                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
 792                 with m.If(pred_mask_o_valid):  # predication masks are ready
 793                     m.next = "PRED_SKIP"
 794
 795             # skip zeros in predicate
 796             with m.State("PRED_SKIP"):
 797                 with m.If(~is_svp64_mode):
 798                     m.next = "DECODE_SV"  # nothing to do
 799                 with m.Else():
 800                     if self.svp64_en:
 801                         pred_src_zero = pdecode2.rm_dec.pred_sz
 802                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 803
 804                         # new srcstep, after skipping zeros
 805                         skip_srcstep = Signal.like(cur_srcstep)
 806                         # value to be added to the current srcstep
 807                         src_delta = Signal.like(cur_srcstep)
 808                         # add leading zeros to srcstep, if not in zero mode
 809                         with m.If(~pred_src_zero):
 810                             # priority encoder (count leading zeros)
 811                             # append guard bit, in case the mask is all zeros
 812                             pri_enc_src = PriorityEncoder(65)
 813                             m.submodules.pri_enc_src = pri_enc_src
 814                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 815                                                          Const(1, 1)))
 816                             comb += src_delta.eq(pri_enc_src.o)
 817                         # apply delta to srcstep
 818                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 819                         # shift-out all leading zeros from the mask
 820                         # plus the leading "one" bit
 821                         # TODO count leading zeros and shift-out the zero
 822                         #      bits, in the same step, in hardware
 823                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 824
 825                         # same as above, but for dststep
 826                         skip_dststep = Signal.like(cur_dststep)
 827                         dst_delta = Signal.like(cur_dststep)
 828                         with m.If(~pred_dst_zero):
 829                             pri_enc_dst = PriorityEncoder(65)
 830                             m.submodules.pri_enc_dst = pri_enc_dst
 831                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 832                                                          Const(1, 1)))
 833                             comb += dst_delta.eq(pri_enc_dst.o)
 834                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 835                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 836
 837                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 838                         with m.If((skip_srcstep >= cur_vl) |
 839                                   (skip_dststep >= cur_vl)):
 840                             # end of VL loop. Update PC and reset src/dst step
 841                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 842                             comb += self.state_w_pc.i_data.eq(nia)
 843                             comb += new_svstate.srcstep.eq(0)
 844                             comb += new_svstate.dststep.eq(0)
 845                             comb += update_svstate.eq(1)
 846                             # synchronize with the simulator
 847                             comb += self.insn_done.eq(1)
 848                             # go back to Issue
 849                             m.next = "ISSUE_START"
 850                         with m.Else():
 851                             # update new src/dst step
 852                             comb += new_svstate.srcstep.eq(skip_srcstep)
 853                             comb += new_svstate.dststep.eq(skip_dststep)
 854                             comb += update_svstate.eq(1)
 855                             # proceed to Decode
 856                             m.next = "DECODE_SV"
 857
 858                         # pass predicate mask bits through to satellite decoders
 859                         # TODO: for SIMD this will be *multiple* bits
 860                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 861                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 862
 863             # after src/dst step have been updated, we are ready
 864             # to decode the instruction
 865             with m.State("DECODE_SV"):
 866                 # decode the instruction
 867                 with m.If(~fetch_failed):
 868                     sync += pdecode2.instr_fault.eq(0)
 869                 sync += core.i.e.eq(pdecode2.e)
 870                 sync += core.i.state.eq(cur_state)
 871                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 872                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 873                 if self.svp64_en:
 874                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 875                     # set RA_OR_ZERO detection in satellite decoders
 876                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 877                     # and svp64 detection
 878                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 879                     # and svp64 bit-rev'd ldst mode
 880                     ldst_dec = pdecode2.use_svp64_ldst_dec
 881                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 882                 # after decoding, reset any previous exception condition,
 883                 # allowing it to be set again during the next execution
 884                 sync += pdecode2.ldst_exc.eq(0)
 885
 886                 m.next = "INSN_EXECUTE"  # move to "execute"
 887
 888             # handshake with execution FSM, move to "wait" once acknowledged
 889             with m.State("INSN_EXECUTE"):
 890                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 891                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 892                     m.next = "EXECUTE_WAIT"
 893
 894             with m.State("EXECUTE_WAIT"):
 895                 # wait on "core stop" release, at instruction end
 896                 # need to do this here, in case we are in a VL>1 loop
 897                 with m.If(~dbg.core_stop_o & ~core_rst):
 898                     comb += exec_pc_i_ready.eq(1)
 899                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 900                     # the exception info needs to be blatted into
 901                     # pdecode.ldst_exc, and the instruction "re-run".
 902                     # when ldst_exc.happened is set, the PowerDecoder2
 903                     # reacts very differently: it re-writes the instruction
 904                     # with a "trap" (calls PowerDecoder2.trap()) which
 905                     # will *overwrite* whatever was requested and jump the
 906                     # PC to the exception address, as well as alter MSR.
 907                     # nothing else needs to be done other than to note
 908                     # the change of PC and MSR (and, later, SVSTATE)
 909                     with m.If(exc_happened):
 910                         mmu = core.fus.get_exc("mmu0")
 911                         ldst = core.fus.get_exc("ldst0")
 912                         if mmu is not None:
 913                             with m.If(fetch_failed):
 914                                 # instruction fetch: exception is from MMU
 915                                 # reset instr_fault (highest priority)
 916                                 sync += pdecode2.ldst_exc.eq(mmu)
 917                                 sync += pdecode2.instr_fault.eq(0)
 918                                 if flush_needed:
 919                                     # request icache to stop asserting "failed"
 920                                     comb += core.icache.flush_in.eq(1)
 921                         with m.If(~fetch_failed):
 922                             # otherwise assume it was a LDST exception
 923                             sync += pdecode2.ldst_exc.eq(ldst)
 924
 925                     with m.If(exec_pc_o_valid):
 926
 927                         # was this the last loop iteration?
 928                         is_last = Signal()
 929                         cur_vl = cur_state.svstate.vl
 930                         comb += is_last.eq(next_srcstep == cur_vl)
 931
 932                         # return directly to Decode if Execute generated an
 933                         # exception.
 934                         with m.If(pdecode2.ldst_exc.happened):
 935                             m.next = "DECODE_SV"
 936
 937                         # if either PC or SVSTATE were changed by the previous
 938                         # instruction, go directly back to Fetch, without
 939                         # updating either PC or SVSTATE
 940                         with m.Elif(pc_changed | sv_changed):
 941                             m.next = "ISSUE_START"
 942
 943                         # also return to Fetch, when no output was a vector
 944                         # (regardless of SRCSTEP and VL), or when the last
 945                         # instruction was really the last one of the VL loop
 946                         with m.Elif((~pdecode2.loop_continue) | is_last):
 947                             # before going back to fetch, update the PC state
 948                             # register with the NIA.
 949                             # ok here we are not reading the branch unit.
 950                             # TODO: this just blithely overwrites whatever
 951                             #       pipeline updated the PC
 952                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 953                             comb += self.state_w_pc.i_data.eq(nia)
 954                             # reset SRCSTEP before returning to Fetch
 955                             if self.svp64_en:
 956                                 with m.If(pdecode2.loop_continue):
 957                                     comb += new_svstate.srcstep.eq(0)
 958                                     comb += new_svstate.dststep.eq(0)
 959                                     comb += update_svstate.eq(1)
 960                             else:
 961                                 comb += new_svstate.srcstep.eq(0)
 962                                 comb += new_svstate.dststep.eq(0)
 963                                 comb += update_svstate.eq(1)
 964                             m.next = "ISSUE_START"
 965
 966                         # returning to Execute? then, first update SRCSTEP
 967                         with m.Else():
 968                             comb += new_svstate.srcstep.eq(next_srcstep)
 969                             comb += new_svstate.dststep.eq(next_dststep)
 970                             comb += update_svstate.eq(1)
 971                             # return to mask skip loop
 972                             m.next = "PRED_SKIP"
 973
 974                 with m.Else():
 975                     comb += dbg.core_stopped_i.eq(1)
 976                     if flush_needed:
 977                         # request the icache to stop asserting "failed"
 978                         comb += core.icache.flush_in.eq(1)
 979                     # stop instruction fault
 980                     sync += pdecode2.instr_fault.eq(0)
 981                     if flush_needed:
 982                         # request the icache to stop asserting "failed"
 983                         comb += core.icache.flush_in.eq(1)
 984                     # stop instruction fault
 985                     sync += pdecode2.instr_fault.eq(0)
 986                     # while stopped, allow updating the PC and SVSTATE
 987                     with m.If(self.pc_i.ok):
 988                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 989                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 990                         sync += pc_changed.eq(1)
 991                     with m.If(self.svstate_i.ok):
 992                         comb += new_svstate.eq(self.svstate_i.data)
 993                         comb += update_svstate.eq(1)
 994                         sync += sv_changed.eq(1)
 995
 996         # check if svstate needs updating: if so, write it to State Regfile
 997         with m.If(update_svstate):
 998             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 999             comb += self.state_w_sv.i_data.eq(new_svstate)
1000             sync += cur_state.svstate.eq(new_svstate)  # for next clock
1001
1002     def execute_fsm(self, m, core, pc_changed, sv_changed,
1003                     exec_insn_i_valid, exec_insn_o_ready,
1004                     exec_pc_o_valid, exec_pc_i_ready):
1005         """execute FSM
1006
1007         execute FSM. this interacts with the "issue" FSM
1008         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1009         (outgoing). SVP64 RM prefixes have already been set up by the
1010         "issue" phase, so execute is fairly straightforward.
1011         """
1012
1013         comb = m.d.comb
1014         sync = m.d.sync
1015         pdecode2 = self.pdecode2
1016
1017         # temporaries
1018         core_busy_o = core.n.o_data.busy_o  # core is busy
1019         core_ivalid_i = core.p.i_valid              # instruction is valid
1020
1021         if hasattr(core, "icache"):
1022             fetch_failed = core.icache.i_out.fetch_failed
1023         else:
1024             fetch_failed = Const(0, 1)
1025
1026         with m.FSM(name="exec_fsm"):
1027
1028             # waiting for instruction bus (stays there until not busy)
1029             with m.State("INSN_START"):
1030                 comb += exec_insn_o_ready.eq(1)
1031                 with m.If(exec_insn_i_valid):
1032                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1033                     sync += sv_changed.eq(0)
1034                     sync += pc_changed.eq(0)
1035                     with m.If(core.p.o_ready):  # only move if accepted
1036                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1037
1038             # instruction started: must wait till it finishes
1039             with m.State("INSN_ACTIVE"):
1040                 # note changes to PC and SVSTATE
1041                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1042                     sync += sv_changed.eq(1)
1043                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1044                     sync += pc_changed.eq(1)
1045                 with m.If(~core_busy_o):  # instruction done!
1046                     comb += exec_pc_o_valid.eq(1)
1047                     with m.If(exec_pc_i_ready):
1048                         # when finished, indicate "done".
1049                         # however, if there was an exception, the instruction
1050                         # is *not* yet done.  this is an implementation
1051                         # detail: we choose to implement exceptions by
1052                         # taking the exception information from the LDST
1053                         # unit, putting that *back* into the PowerDecoder2,
1054                         # and *re-running the entire instruction*.
1055                         # if we erroneously indicate "done" here, it is as if
1056                         # there were *TWO* instructions:
1057                         # 1) the failed LDST 2) a TRAP.
1058                         with m.If(~pdecode2.ldst_exc.happened &
1059                                   ~fetch_failed):
1060                             comb += self.insn_done.eq(1)
1061                         m.next = "INSN_START"  # back to fetch
1062
1063     def setup_peripherals(self, m):
1064         comb, sync = m.d.comb, m.d.sync
1065
1066         # okaaaay so the debug module must be in coresync clock domain
1067         # but NOT its reset signal. to cope with this, set every single
1068         # submodule explicitly in coresync domain, debug and JTAG
1069         # in their own one but using *external* reset.
1070         csd = DomainRenamer("coresync")
1071         dbd = DomainRenamer(self.dbg_domain)
1072
1073         m.submodules.core = core = csd(self.core)
1074         # this _so_ needs sorting out.  ICache is added down inside
1075         # LoadStore1 and is already a submodule of LoadStore1
1076         if not isinstance(self.imem, ICache):
1077             m.submodules.imem = imem = csd(self.imem)
1078         m.submodules.dbg = dbg = dbd(self.dbg)
1079         if self.jtag_en:
1080             m.submodules.jtag = jtag = dbd(self.jtag)
1081             # TODO: UART2GDB mux, here, from external pin
1082             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1083             sync += dbg.dmi.connect_to(jtag.dmi)
1084
1085         cur_state = self.cur_state
1086
1087         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1088         if self.sram4x4k:
1089             for i, sram in enumerate(self.sram4k):
1090                 m.submodules["sram4k_%d" % i] = csd(sram)
1091                 comb += sram.enable.eq(self.wb_sram_en)
1092
1093         # XICS interrupt handler
1094         if self.xics:
1095             m.submodules.xics_icp = icp = csd(self.xics_icp)
1096             m.submodules.xics_ics = ics = csd(self.xics_ics)
1097             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1098             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
1099
1100         # GPIO test peripheral
1101         if self.gpio:
1102             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1103
1104         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1105         # XXX causes litex ECP5 test to get wrong idea about input and output
1106         # (but works with verilator sim *sigh*)
1107         # if self.gpio and self.xics:
1108         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1109
1110         # instruction decoder
1111         pdecode = create_pdecode()
1112         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1113         if self.svp64_en:
1114             m.submodules.svp64 = svp64 = csd(self.svp64)
1115
1116         # convenience
1117         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1118         intrf = self.core.regs.rf['int']
1119
1120         # clock delay power-on reset
1121         cd_por = ClockDomain(reset_less=True)
1122         cd_sync = ClockDomain()
1123         core_sync = ClockDomain("coresync")
1124         m.domains += cd_por, cd_sync, core_sync
1125         if self.dbg_domain != "sync":
1126             dbg_sync = ClockDomain(self.dbg_domain)
1127             m.domains += dbg_sync
1128
1129         ti_rst = Signal(reset_less=True)
1130         delay = Signal(range(4), reset=3)
1131         with m.If(delay != 0):
1132             m.d.por += delay.eq(delay - 1)
1133         comb += cd_por.clk.eq(ClockSignal())
1134
1135         # power-on reset delay
1136         core_rst = ResetSignal("coresync")
1137         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1138         comb += core_rst.eq(ti_rst)
1139
1140         # debug clock is same as coresync, but reset is *main external*
1141         if self.dbg_domain != "sync":
1142             dbg_rst = ResetSignal(self.dbg_domain)
1143             comb += dbg_rst.eq(ResetSignal())
1144
1145         # busy/halted signals from core
1146         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
1147         comb += self.busy_o.eq(core_busy_o)
1148         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1149
1150         # temporary hack: says "go" immediately for both address gen and ST
1151         l0 = core.l0
1152         ldst = core.fus.fus['ldst0']
1153         st_go_edge = rising_edge(m, ldst.st.rel_o)
1154         # link addr-go direct to rel
1155         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1156         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
1157
1158     def elaborate(self, platform):
1159         m = Module()
1160         # convenience
1161         comb, sync = m.d.comb, m.d.sync
1162         cur_state = self.cur_state
1163         pdecode2 = self.pdecode2
1164         dbg = self.dbg
1165         core = self.core
1166
1167         # set up peripherals and core
1168         core_rst = self.core_rst
1169         self.setup_peripherals(m)
1170
1171         # reset current state if core reset requested
1172         with m.If(core_rst):
1173             m.d.sync += self.cur_state.eq(0)
1174
1175         # PC and instruction from I-Memory
1176         comb += self.pc_o.eq(cur_state.pc)
1177         pc_changed = Signal()  # note write to PC
1178         sv_changed = Signal()  # note write to SVSTATE
1179
1180         # indicate to outside world if any FU is still executing
1181         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1182
1183         # read state either from incoming override or from regfile
1184         # TODO: really should be doing MSR in the same way
1185         pc = state_get(m, core_rst, self.pc_i,
1186                        "pc",                  # read PC
1187                        self.state_r_pc, StateRegs.PC)
1188         svstate = state_get(m, core_rst, self.svstate_i,
1189                             "svstate",   # read SVSTATE
1190                             self.state_r_sv, StateRegs.SVSTATE)
1191
1192         # don't write pc every cycle
1193         comb += self.state_w_pc.wen.eq(0)
1194         comb += self.state_w_pc.i_data.eq(0)
1195
1196         # address of the next instruction, in the absence of a branch
1197         # depends on the instruction size
1198         nia = Signal(64)
1199
1200         # connect up debug signals
1201         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1202         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1203         comb += dbg.state.pc.eq(pc)
1204         comb += dbg.state.svstate.eq(svstate)
1205         comb += dbg.state.msr.eq(cur_state.msr)
1206
1207         # pass the prefix mode from Fetch to Issue, so the latter can loop
1208         # on VL==0
1209         is_svp64_mode = Signal()
1210
1211         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1212         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1213         # these are the handshake signals between each
1214
1215         # fetch FSM can run as soon as the PC is valid
1216         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1217         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1218
1219         # fetch FSM hands over the instruction to be decoded / issued
1220         fetch_insn_o_valid = Signal()
1221         fetch_insn_i_ready = Signal()
1222
1223         # predicate fetch FSM decodes and fetches the predicate
1224         pred_insn_i_valid = Signal()
1225         pred_insn_o_ready = Signal()
1226
1227         # predicate fetch FSM delivers the masks
1228         pred_mask_o_valid = Signal()
1229         pred_mask_i_ready = Signal()
1230
1231         # issue FSM delivers the instruction to the be executed
1232         exec_insn_i_valid = Signal()
1233         exec_insn_o_ready = Signal()
1234
1235         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1236         exec_pc_o_valid = Signal()
1237         exec_pc_i_ready = Signal()
1238
1239         # the FSMs here are perhaps unusual in that they detect conditions
1240         # then "hold" information, combinatorially, for the core
1241         # (as opposed to using sync - which would be on a clock's delay)
1242         # this includes the actual opcode, valid flags and so on.
1243
1244         # Fetch, then predicate fetch, then Issue, then Execute.
1245         # Issue is where the VL for-loop # lives.  the ready/valid
1246         # signalling is used to communicate between the four.
1247
1248         # set up Fetch FSM
1249         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1250                          self.imem, core_rst, pdecode2, cur_state,
1251                          dbg, core, svstate, nia, is_svp64_mode)
1252         m.submodules.fetch = fetch
1253         # connect up in/out data to existing Signals
1254         comb += fetch.p.i_data.pc.eq(pc)
1255         # and the ready/valid signalling
1256         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1257         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1258         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1259         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1260
1261         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1262                        dbg, core_rst, is_svp64_mode,
1263                        fetch_pc_o_ready, fetch_pc_i_valid,
1264                        fetch_insn_o_valid, fetch_insn_i_ready,
1265                        pred_insn_i_valid, pred_insn_o_ready,
1266                        pred_mask_o_valid, pred_mask_i_ready,
1267                        exec_insn_i_valid, exec_insn_o_ready,
1268                        exec_pc_o_valid, exec_pc_i_ready)
1269
1270         if self.svp64_en:
1271             self.fetch_predicate_fsm(m,
1272                                      pred_insn_i_valid, pred_insn_o_ready,
1273                                      pred_mask_o_valid, pred_mask_i_ready)
1274
1275         self.execute_fsm(m, core, pc_changed, sv_changed,
1276                          exec_insn_i_valid, exec_insn_o_ready,
1277                          exec_pc_o_valid, exec_pc_i_ready)
1278
1279         # this bit doesn't have to be in the FSM: connect up to read
1280         # regfiles on demand from DMI
1281         self.do_dmi(m, dbg)
1282
1283         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1284         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1285         self.tb_dec_fsm(m, cur_state.dec)
1286
1287         return m
1288
1289     def do_dmi(self, m, dbg):
1290         """deals with DMI debug requests
1291
1292         currently only provides read requests for the INT regfile, CR and XER
1293         it will later also deal with *writing* to these regfiles.
1294         """
1295         comb = m.d.comb
1296         sync = m.d.sync
1297         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1298         intrf = self.core.regs.rf['int']
1299
1300         with m.If(d_reg.req):  # request for regfile access being made
1301             # TODO: error-check this
1302             # XXX should this be combinatorial?  sync better?
1303             if intrf.unary:
1304                 comb += self.int_r.ren.eq(1 << d_reg.addr)
1305             else:
1306                 comb += self.int_r.addr.eq(d_reg.addr)
1307                 comb += self.int_r.ren.eq(1)
1308         d_reg_delay = Signal()
1309         sync += d_reg_delay.eq(d_reg.req)
1310         with m.If(d_reg_delay):
1311             # data arrives one clock later
1312             comb += d_reg.data.eq(self.int_r.o_data)
1313             comb += d_reg.ack.eq(1)
1314
1315         # sigh same thing for CR debug
1316         with m.If(d_cr.req):  # request for regfile access being made
1317             comb += self.cr_r.ren.eq(0b11111111)  # enable all
1318         d_cr_delay = Signal()
1319         sync += d_cr_delay.eq(d_cr.req)
1320         with m.If(d_cr_delay):
1321             # data arrives one clock later
1322             comb += d_cr.data.eq(self.cr_r.o_data)
1323             comb += d_cr.ack.eq(1)
1324
1325         # aaand XER...
1326         with m.If(d_xer.req):  # request for regfile access being made
1327             comb += self.xer_r.ren.eq(0b111111)  # enable all
1328         d_xer_delay = Signal()
1329         sync += d_xer_delay.eq(d_xer.req)
1330         with m.If(d_xer_delay):
1331             # data arrives one clock later
1332             comb += d_xer.data.eq(self.xer_r.o_data)
1333             comb += d_xer.ack.eq(1)
1334
1335     def tb_dec_fsm(self, m, spr_dec):
1336         """tb_dec_fsm
1337
1338         this is a FSM for updating either dec or tb.  it runs alternately
1339         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1340         value to DEC, however the regfile has "passthrough" on it so this
1341         *should* be ok.
1342
1343         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1344         """
1345
1346         comb, sync = m.d.comb, m.d.sync
1347         fast_rf = self.core.regs.rf['fast']
1348         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
1349         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
1350
1351         with m.FSM() as fsm:
1352
1353             # initiates read of current DEC
1354             with m.State("DEC_READ"):
1355                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1356                 comb += fast_r_dectb.ren.eq(1)
1357                 m.next = "DEC_WRITE"
1358
1359             # waits for DEC read to arrive (1 cycle), updates with new value
1360             with m.State("DEC_WRITE"):
1361                 new_dec = Signal(64)
1362                 # TODO: MSR.LPCR 32-bit decrement mode
1363                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1364                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1365                 comb += fast_w_dectb.wen.eq(1)
1366                 comb += fast_w_dectb.i_data.eq(new_dec)
1367                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
1368                 m.next = "TB_READ"
1369
1370             # initiates read of current TB
1371             with m.State("TB_READ"):
1372                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1373                 comb += fast_r_dectb.ren.eq(1)
1374                 m.next = "TB_WRITE"
1375
1376             # waits for read TB to arrive, initiates write of current TB
1377             with m.State("TB_WRITE"):
1378                 new_tb = Signal(64)
1379                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1380                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1381                 comb += fast_w_dectb.wen.eq(1)
1382                 comb += fast_w_dectb.i_data.eq(new_tb)
1383                 m.next = "DEC_READ"
1384
1385         return m
1386
1387     def __iter__(self):
1388         yield from self.pc_i.ports()
1389         yield self.pc_o
1390         yield self.memerr_o
1391         yield from self.core.ports()
1392         yield from self.imem.ports()
1393         yield self.core_bigendian_i
1394         yield self.busy_o
1395
1396     def ports(self):
1397         return list(self)
1398
1399     def external_ports(self):
1400         ports = self.pc_i.ports()
1401         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1402                   ]
1403
1404         if self.jtag_en:
1405             ports += list(self.jtag.external_ports())
1406         else:
1407             # don't add DMI if JTAG is enabled
1408             ports += list(self.dbg.dmi.ports())
1409
1410         ports += list(self.imem.ibus.fields.values())
1411         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1412
1413         if self.sram4x4k:
1414             for sram in self.sram4k:
1415                 ports += list(sram.bus.fields.values())
1416
1417         if self.xics:
1418             ports += list(self.xics_icp.bus.fields.values())
1419             ports += list(self.xics_ics.bus.fields.values())
1420             ports.append(self.int_level_i)
1421
1422         if self.gpio:
1423             ports += list(self.simple_gpio.bus.fields.values())
1424             ports.append(self.gpio_o)
1425
1426         return ports
1427
1428     def ports(self):
1429         return list(self)
1430
1431
1432 class TestIssuer(Elaboratable):
1433     def __init__(self, pspec):
1434         self.ti = TestIssuerInternal(pspec)
1435         self.pll = DummyPLL(instance=True)
1436
1437         # PLL direct clock or not
1438         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1439         if self.pll_en:
1440             self.pll_test_o = Signal(reset_less=True)
1441             self.pll_vco_o = Signal(reset_less=True)
1442             self.clk_sel_i = Signal(2, reset_less=True)
1443             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1444             self.pllclk_clk = ClockSignal("pllclk")
1445
1446     def elaborate(self, platform):
1447         m = Module()
1448         comb = m.d.comb
1449
1450         # TestIssuer nominally runs at main clock, actually it is
1451         # all combinatorial internally except for coresync'd components
1452         m.submodules.ti = ti = self.ti
1453
1454         if self.pll_en:
1455             # ClockSelect runs at PLL output internal clock rate
1456             m.submodules.wrappll = pll = self.pll
1457
1458             # add clock domains from PLL
1459             cd_pll = ClockDomain("pllclk")
1460             m.domains += cd_pll
1461
1462             # PLL clock established.  has the side-effect of running clklsel
1463             # at the PLL's speed (see DomainRenamer("pllclk") above)
1464             pllclk = self.pllclk_clk
1465             comb += pllclk.eq(pll.clk_pll_o)
1466
1467             # wire up external 24mhz to PLL
1468             #comb += pll.clk_24_i.eq(self.ref_clk)
1469             # output 18 mhz PLL test signal, and analog oscillator out
1470             comb += self.pll_test_o.eq(pll.pll_test_o)
1471             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1472
1473             # input to pll clock selection
1474             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1475
1476             # now wire up ResetSignals.  don't mind them being in this domain
1477             pll_rst = ResetSignal("pllclk")
1478             comb += pll_rst.eq(ResetSignal())
1479
1480         # internal clock is set to selector clock-out.  has the side-effect of
1481         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1482         # debug clock runs at coresync internal clock
1483         cd_coresync = ClockDomain("coresync")
1484         #m.domains += cd_coresync
1485         if self.ti.dbg_domain != 'sync':
1486             cd_dbgsync = ClockDomain("dbgsync")
1487             #m.domains += cd_dbgsync
1488         intclk = ClockSignal("coresync")
1489         dbgclk = ClockSignal(self.ti.dbg_domain)
1490         # XXX BYPASS PLL XXX
1491         # XXX BYPASS PLL XXX
1492         # XXX BYPASS PLL XXX
1493         if self.pll_en:
1494             comb += intclk.eq(self.ref_clk)
1495         else:
1496             comb += intclk.eq(ClockSignal())
1497         if self.ti.dbg_domain != 'sync':
1498             dbgclk = ClockSignal(self.ti.dbg_domain)
1499             comb += dbgclk.eq(intclk)
1500
1501         return m
1502
1503     def ports(self):
1504         return list(self.ti.ports()) + list(self.pll.ports()) + \
1505             [ClockSignal(), ResetSignal()]
1506
1507     def external_ports(self):
1508         ports = self.ti.external_ports()
1509         ports.append(ClockSignal())
1510         ports.append(ResetSignal())
1511         if self.pll_en:
1512             ports.append(self.clk_sel_i)
1513             ports.append(self.pll.clk_24_i)
1514             ports.append(self.pll_test_o)
1515             ports.append(self.pll_vco_o)
1516             ports.append(self.pllclk_clk)
1517             ports.append(self.ref_clk)
1518         return ports
1519
1520
1521 if __name__ == '__main__':
1522     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1523              'spr': 1,
1524              'div': 1,
1525              'mul': 1,
1526              'shiftrot': 1
1527              }
1528     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1529                          imem_ifacetype='bare_wb',
1530                          addr_wid=48,
1531                          mask_wid=8,
1532                          reg_wid=64,
1533                          units=units)
1534     dut = TestIssuer(pspec)
1535     vl = main(dut, ports=dut.ports(), name="test_issuer")
1536
1537     if len(sys.argv) == 1:
1538         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1539         with open("test_issuer.il", "w") as f:
1540             f.write(vl)