src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the PC
  70     res = Signal(64, reset_less=True, name=name)
  71     res_ok_delay = Signal(name="%s_ok_delay" % name)
  72     with m.If(~core_rst):
  73         sync += res_ok_delay.eq(~state_i.ok)
  74         with m.If(state_i.ok):
  75             # incoming override (start from pc_i)
  76             comb += res.eq(state_i.data)
  77         with m.Else():
  78             # otherwise read StateRegs regfile for PC...
  79             comb += regfile.ren.eq(1 << regnum)
  80         # ... but on a 1-clock delay
  81         with m.If(res_ok_delay):
  82             comb += res.eq(regfile.o_data)
  83     return res
  84
  85
  86 def get_predint(m, mask, name):
  87     """decode SVP64 predicate integer mask field to reg number and invert
  88     this is identical to the equivalent function in ISACaller except that
  89     it doesn't read the INT directly, it just decodes "what needs to be done"
  90     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  91
  92     * all1s is set to indicate that no mask is to be applied.
  93     * regread indicates the GPR register number to be read
  94     * invert is set to indicate that the register value is to be inverted
  95     * unary indicates that the contents of the register is to be shifted 1<<r3
  96     """
  97     comb = m.d.comb
  98     regread = Signal(5, name=name+"regread")
  99     invert = Signal(name=name+"invert")
 100     unary = Signal(name=name+"unary")
 101     all1s = Signal(name=name+"all1s")
 102     with m.Switch(mask):
 103         with m.Case(SVP64PredInt.ALWAYS.value):
 104             comb += all1s.eq(1)      # use 0b1111 (all ones)
 105         with m.Case(SVP64PredInt.R3_UNARY.value):
 106             comb += regread.eq(3)
 107             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 108         with m.Case(SVP64PredInt.R3.value):
 109             comb += regread.eq(3)
 110         with m.Case(SVP64PredInt.R3_N.value):
 111             comb += regread.eq(3)
 112             comb += invert.eq(1)
 113         with m.Case(SVP64PredInt.R10.value):
 114             comb += regread.eq(10)
 115         with m.Case(SVP64PredInt.R10_N.value):
 116             comb += regread.eq(10)
 117             comb += invert.eq(1)
 118         with m.Case(SVP64PredInt.R30.value):
 119             comb += regread.eq(30)
 120         with m.Case(SVP64PredInt.R30_N.value):
 121             comb += regread.eq(30)
 122             comb += invert.eq(1)
 123     return regread, invert, unary, all1s
 124
 125
 126 def get_predcr(m, mask, name):
 127     """decode SVP64 predicate CR to reg number field and invert status
 128     this is identical to _get_predcr in ISACaller
 129     """
 130     comb = m.d.comb
 131     idx = Signal(2, name=name+"idx")
 132     invert = Signal(name=name+"crinvert")
 133     with m.Switch(mask):
 134         with m.Case(SVP64PredCR.LT.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(0)
 137         with m.Case(SVP64PredCR.GE.value):
 138             comb += idx.eq(CR.LT)
 139             comb += invert.eq(1)
 140         with m.Case(SVP64PredCR.GT.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(0)
 143         with m.Case(SVP64PredCR.LE.value):
 144             comb += idx.eq(CR.GT)
 145             comb += invert.eq(1)
 146         with m.Case(SVP64PredCR.EQ.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(0)
 149         with m.Case(SVP64PredCR.NE.value):
 150             comb += idx.eq(CR.EQ)
 151             comb += invert.eq(1)
 152         with m.Case(SVP64PredCR.SO.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(0)
 155         with m.Case(SVP64PredCR.NS.value):
 156             comb += idx.eq(CR.SO)
 157             comb += invert.eq(1)
 158     return idx, invert
 159
 160
 161 # Fetch Finite State Machine.
 162 # WARNING: there are currently DriverConflicts but it's actually working.
 163 # TODO, here: everything that is global in nature, information from the
 164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 165 # not only that: TestIssuerInternal.imem can entirely move into here
 166 # because imem is only ever accessed inside the FetchFSM.
 167 class FetchFSM(ControlBase):
 168     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 169                  pdecode2, cur_state,
 170                  dbg, core, svstate, nia, is_svp64_mode):
 171         self.allow_overlap = allow_overlap
 172         self.svp64_en = svp64_en
 173         self.imem = imem
 174         self.core_rst = core_rst
 175         self.pdecode2 = pdecode2
 176         self.cur_state = cur_state
 177         self.dbg = dbg
 178         self.core = core
 179         self.svstate = svstate
 180         self.nia = nia
 181         self.is_svp64_mode = is_svp64_mode
 182
 183         # set up pipeline ControlBase and allocate i/o specs
 184         # (unusual: normally done by the Pipeline API)
 185         super().__init__(stage=self)
 186         self.p.i_data, self.n.o_data = self.new_specs(None)
 187         self.i, self.o = self.p.i_data, self.n.o_data
 188
 189     # next 3 functions are Stage API Compliance
 190     def setup(self, m, i):
 191         pass
 192
 193     def ispec(self):
 194         return FetchInput()
 195
 196     def ospec(self):
 197         return FetchOutput()
 198
 199     def elaborate(self, platform):
 200         """fetch FSM
 201
 202         this FSM performs fetch of raw instruction data, partial-decodes
 203         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 204         read a 2nd 32-bit quantity if that occurs.
 205         """
 206         m = super().elaborate(platform)
 207
 208         dbg = self.dbg
 209         core = self.core
 210         pc = self.i.pc
 211         svstate = self.svstate
 212         nia = self.nia
 213         is_svp64_mode = self.is_svp64_mode
 214         fetch_pc_o_ready = self.p.o_ready
 215         fetch_pc_i_valid = self.p.i_valid
 216         fetch_insn_o_valid = self.n.o_valid
 217         fetch_insn_i_ready = self.n.i_ready
 218
 219         comb = m.d.comb
 220         sync = m.d.sync
 221         pdecode2 = self.pdecode2
 222         cur_state = self.cur_state
 223         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 224
 225         msr_read = Signal(reset=1)
 226
 227         # also note instruction fetch failed
 228         if hasattr(core, "icache"):
 229             fetch_failed = core.icache.i_out.fetch_failed
 230         else:
 231             fetch_failed = Const(0, 1)
 232
 233         # don't read msr every cycle
 234         staterf = self.core.regs.rf['state']
 235         state_r_msr = staterf.r_ports['msr']  # MSR rd
 236
 237         comb += state_r_msr.ren.eq(0)
 238
 239         with m.FSM(name='fetch_fsm'):
 240
 241             # waiting (zzz)
 242             with m.State("IDLE"):
 243                 with m.If(~dbg.stopping_o & ~fetch_failed):
 244                     comb += fetch_pc_o_ready.eq(1)
 245                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 246                     # instruction allowed to go: start by reading the PC
 247                     # capture the PC and also drop it into Insn Memory
 248                     # we have joined a pair of combinatorial memory
 249                     # lookups together.  this is Generally Bad.
 250                     comb += self.imem.a_pc_i.eq(pc)
 251                     comb += self.imem.a_i_valid.eq(1)
 252                     comb += self.imem.f_i_valid.eq(1)
 253                     sync += cur_state.pc.eq(pc)
 254                     sync += cur_state.svstate.eq(svstate)  # and svstate
 255
 256                     # initiate read of MSR. arrives one clock later
 257                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 258                     sync += msr_read.eq(0)
 259
 260                     m.next = "INSN_READ"  # move to "wait for bus" phase
 261
 262             # dummy pause to find out why simulation is not keeping up
 263             with m.State("INSN_READ"):
 264                 if self.allow_overlap:
 265                     stopping = dbg.stopping_o
 266                 else:
 267                     stopping = Const(0)
 268                 with m.If(stopping):
 269                     # stopping: jump back to idle
 270                     m.next = "IDLE"
 271                 with m.Else():
 272                     # one cycle later, msr/sv read arrives.  valid only once.
 273                     with m.If(~msr_read):
 274                         sync += msr_read.eq(1)  # yeah don't read it again
 275                         sync += cur_state.msr.eq(state_r_msr.o_data)
 276                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 277                         # busy but not fetch failed: stay in wait-read
 278                         comb += self.imem.a_i_valid.eq(1)
 279                         comb += self.imem.f_i_valid.eq(1)
 280                     with m.Else():
 281                         # not busy (or fetch failed!): instruction fetched
 282                         # when fetch failed, the instruction gets ignored
 283                         # by the decoder
 284                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 285                         if self.svp64_en:
 286                             svp64 = self.svp64
 287                             # decode the SVP64 prefix, if any
 288                             comb += svp64.raw_opcode_in.eq(insn)
 289                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 290                             # pass the decoded prefix (if any) to PowerDecoder2
 291                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 292                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 293                             # remember whether this is a prefixed instruction,
 294                             # so the FSM can readily loop when VL==0
 295                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 296                             # calculate the address of the following instruction
 297                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 298                             sync += nia.eq(cur_state.pc + insn_size)
 299                             with m.If(~svp64.is_svp64_mode):
 300                                 # with no prefix, store the instruction
 301                                 # and hand it directly to the next FSM
 302                                 sync += dec_opcode_o.eq(insn)
 303                                 m.next = "INSN_READY"
 304                             with m.Else():
 305                                 # fetch the rest of the instruction from memory
 306                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 307                                 comb += self.imem.a_i_valid.eq(1)
 308                                 comb += self.imem.f_i_valid.eq(1)
 309                                 m.next = "INSN_READ2"
 310                         else:
 311                             # not SVP64 - 32-bit only
 312                             sync += nia.eq(cur_state.pc + 4)
 313                             sync += dec_opcode_o.eq(insn)
 314                             m.next = "INSN_READY"
 315
 316             with m.State("INSN_READ2"):
 317                 with m.If(self.imem.f_busy_o):  # zzz...
 318                     # busy: stay in wait-read
 319                     comb += self.imem.a_i_valid.eq(1)
 320                     comb += self.imem.f_i_valid.eq(1)
 321                 with m.Else():
 322                     # not busy: instruction fetched
 323                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 324                     sync += dec_opcode_o.eq(insn)
 325                     m.next = "INSN_READY"
 326                     # TODO: probably can start looking at pdecode2.rm_dec
 327                     # here or maybe even in INSN_READ state, if svp64_mode
 328                     # detected, in order to trigger - and wait for - the
 329                     # predicate reading.
 330                     if self.svp64_en:
 331                         pmode = pdecode2.rm_dec.predmode
 332                     """
 333                     if pmode != SVP64PredMode.ALWAYS.value:
 334                         fire predicate loading FSM and wait before
 335                         moving to INSN_READY
 336                     else:
 337                         sync += self.srcmask.eq(-1) # set to all 1s
 338                         sync += self.dstmask.eq(-1) # set to all 1s
 339                         m.next = "INSN_READY"
 340                     """
 341
 342             with m.State("INSN_READY"):
 343                 # hand over the instruction, to be decoded
 344                 comb += fetch_insn_o_valid.eq(1)
 345                 with m.If(fetch_insn_i_ready):
 346                     m.next = "IDLE"
 347
 348         # whatever was done above, over-ride it if core reset is held
 349         with m.If(self.core_rst):
 350             sync += nia.eq(0)
 351
 352         return m
 353
 354
 355 class TestIssuerInternal(Elaboratable):
 356     """TestIssuer - reads instructions from TestMemory and issues them
 357
 358     efficiency and speed is not the main goal here: functional correctness
 359     and code clarity is.  optimisations (which almost 100% interfere with
 360     easy understanding) come later.
 361     """
 362
 363     def __init__(self, pspec):
 364
 365         # test is SVP64 is to be enabled
 366         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 367
 368         # and if regfiles are reduced
 369         self.regreduce_en = (hasattr(pspec, "regreduce") and
 370                              (pspec.regreduce == True))
 371
 372         # and if overlap requested
 373         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 374                               (pspec.allow_overlap == True))
 375
 376         # JTAG interface.  add this right at the start because if it's
 377         # added it *modifies* the pspec, by adding enable/disable signals
 378         # for parts of the rest of the core
 379         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 380         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 381         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 382         if self.jtag_en:
 383             # XXX MUST keep this up-to-date with litex, and
 384             # soc-cocotb-sim, and err.. all needs sorting out, argh
 385             subset = ['uart',
 386                       'mtwi',
 387                       'eint', 'gpio', 'mspi0',
 388                       # 'mspi1', - disabled for now
 389                       # 'pwm', 'sd0', - disabled for now
 390                       'sdr']
 391             self.jtag = JTAG(get_pinspecs(subset=subset),
 392                              domain=self.dbg_domain)
 393             # add signals to pspec to enable/disable icache and dcache
 394             # (or data and intstruction wishbone if icache/dcache not included)
 395             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 396             # TODO: do we actually care if these are not domain-synchronised?
 397             # honestly probably not.
 398             pspec.wb_icache_en = self.jtag.wb_icache_en
 399             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 400             self.wb_sram_en = self.jtag.wb_sram_en
 401         else:
 402             self.wb_sram_en = Const(1)
 403
 404         # add 4k sram blocks?
 405         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 406                          pspec.sram4x4kblock == True)
 407         if self.sram4x4k:
 408             self.sram4k = []
 409             for i in range(4):
 410                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 411                                                     # features={'err'}
 412                                                     ))
 413
 414         # add interrupt controller?
 415         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 416         if self.xics:
 417             self.xics_icp = XICS_ICP()
 418             self.xics_ics = XICS_ICS()
 419             self.int_level_i = self.xics_ics.int_level_i
 420
 421         # add GPIO peripheral?
 422         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 423         if self.gpio:
 424             self.simple_gpio = SimpleGPIO()
 425             self.gpio_o = self.simple_gpio.gpio_o
 426
 427         # main instruction core.  suitable for prototyping / demo only
 428         self.core = core = NonProductionCore(pspec)
 429         self.core_rst = ResetSignal("coresync")
 430
 431         # instruction decoder.  goes into Trap Record
 432         #pdecode = create_pdecode()
 433         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 434         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 435                                      opkls=IssuerDecode2ToOperand,
 436                                      svp64_en=self.svp64_en,
 437                                      regreduce_en=self.regreduce_en)
 438         pdecode = self.pdecode2.dec
 439
 440         if self.svp64_en:
 441             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 442
 443         # Test Instruction memory
 444         if hasattr(core, "icache"):
 445             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 446             # truly dreadful.  needs a huge reorg.
 447             pspec.icache = core.icache
 448         self.imem = ConfigFetchUnit(pspec).fu
 449
 450         # DMI interface
 451         self.dbg = CoreDebug()
 452
 453         # instruction go/monitor
 454         self.pc_o = Signal(64, reset_less=True)
 455         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 456         self.svstate_i = Data(64, "svstate_i")  # ditto
 457         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 458         self.busy_o = Signal(reset_less=True)
 459         self.memerr_o = Signal(reset_less=True)
 460
 461         # STATE regfile read /write ports for PC, MSR, SVSTATE
 462         staterf = self.core.regs.rf['state']
 463         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 464         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 465         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 466         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 467
 468         # DMI interface access
 469         intrf = self.core.regs.rf['int']
 470         crrf = self.core.regs.rf['cr']
 471         xerrf = self.core.regs.rf['xer']
 472         self.int_r = intrf.r_ports['dmi']  # INT read
 473         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 474         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 475
 476         if self.svp64_en:
 477             # for predication
 478             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 479             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 480
 481         # hack method of keeping an eye on whether branch/trap set the PC
 482         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 483         self.state_nia.wen.name = 'state_nia_wen'
 484
 485         # pulse to synchronize the simulator at instruction end
 486         self.insn_done = Signal()
 487
 488         # indicate any instruction still outstanding, in execution
 489         self.any_busy = Signal()
 490
 491         if self.svp64_en:
 492             # store copies of predicate masks
 493             self.srcmask = Signal(64)
 494             self.dstmask = Signal(64)
 495
 496     def fetch_predicate_fsm(self, m,
 497                             pred_insn_i_valid, pred_insn_o_ready,
 498                             pred_mask_o_valid, pred_mask_i_ready):
 499         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 500            src/dest predicate masks
 501
 502         https://bugs.libre-soc.org/show_bug.cgi?id=617
 503         the predicates can be read here, by using IntRegs r_ports['pred']
 504         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 505         be done through multiple reads, extracting one relevant at a time.
 506         later, a faster way would be to use the 32-bit-wide CR port but
 507         this is more complex decoding, here.  equivalent code used in
 508         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 509
 510         note: this ENTIRE FSM is not to be called when svp64 is disabled
 511         """
 512         comb = m.d.comb
 513         sync = m.d.sync
 514         pdecode2 = self.pdecode2
 515         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 516         predmode = rm_dec.predmode
 517         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 518         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 519         # get src/dst step, so we can skip already used mask bits
 520         cur_state = self.cur_state
 521         srcstep = cur_state.svstate.srcstep
 522         dststep = cur_state.svstate.dststep
 523         cur_vl = cur_state.svstate.vl
 524
 525         # decode predicates
 526         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 527         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 528         sidx, scrinvert = get_predcr(m, srcpred, 's')
 529         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 530
 531         # store fetched masks, for either intpred or crpred
 532         # when src/dst step is not zero, the skipped mask bits need to be
 533         # shifted-out, before actually storing them in src/dest mask
 534         new_srcmask = Signal(64, reset_less=True)
 535         new_dstmask = Signal(64, reset_less=True)
 536
 537         with m.FSM(name="fetch_predicate"):
 538
 539             with m.State("FETCH_PRED_IDLE"):
 540                 comb += pred_insn_o_ready.eq(1)
 541                 with m.If(pred_insn_i_valid):
 542                     with m.If(predmode == SVP64PredMode.INT):
 543                         # skip fetching destination mask register, when zero
 544                         with m.If(dall1s):
 545                             sync += new_dstmask.eq(-1)
 546                             # directly go to fetch source mask register
 547                             # guaranteed not to be zero (otherwise predmode
 548                             # would be SVP64PredMode.ALWAYS, not INT)
 549                             comb += int_pred.addr.eq(sregread)
 550                             comb += int_pred.ren.eq(1)
 551                             m.next = "INT_SRC_READ"
 552                         # fetch destination predicate register
 553                         with m.Else():
 554                             comb += int_pred.addr.eq(dregread)
 555                             comb += int_pred.ren.eq(1)
 556                             m.next = "INT_DST_READ"
 557                     with m.Elif(predmode == SVP64PredMode.CR):
 558                         # go fetch masks from the CR register file
 559                         sync += new_srcmask.eq(0)
 560                         sync += new_dstmask.eq(0)
 561                         m.next = "CR_READ"
 562                     with m.Else():
 563                         sync += self.srcmask.eq(-1)
 564                         sync += self.dstmask.eq(-1)
 565                         m.next = "FETCH_PRED_DONE"
 566
 567             with m.State("INT_DST_READ"):
 568                 # store destination mask
 569                 inv = Repl(dinvert, 64)
 570                 with m.If(dunary):
 571                     # set selected mask bit for 1<<r3 mode
 572                     dst_shift = Signal(range(64))
 573                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 574                     sync += new_dstmask.eq(1 << dst_shift)
 575                 with m.Else():
 576                     # invert mask if requested
 577                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 578                 # skip fetching source mask register, when zero
 579                 with m.If(sall1s):
 580                     sync += new_srcmask.eq(-1)
 581                     m.next = "FETCH_PRED_SHIFT_MASK"
 582                 # fetch source predicate register
 583                 with m.Else():
 584                     comb += int_pred.addr.eq(sregread)
 585                     comb += int_pred.ren.eq(1)
 586                     m.next = "INT_SRC_READ"
 587
 588             with m.State("INT_SRC_READ"):
 589                 # store source mask
 590                 inv = Repl(sinvert, 64)
 591                 with m.If(sunary):
 592                     # set selected mask bit for 1<<r3 mode
 593                     src_shift = Signal(range(64))
 594                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 595                     sync += new_srcmask.eq(1 << src_shift)
 596                 with m.Else():
 597                     # invert mask if requested
 598                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 599                 m.next = "FETCH_PRED_SHIFT_MASK"
 600
 601             # fetch masks from the CR register file
 602             # implements the following loop:
 603             # idx, inv = get_predcr(mask)
 604             # mask = 0
 605             # for cr_idx in range(vl):
 606             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 607             #     if cr[idx] ^ inv:
 608             #         mask |= 1 << cr_idx
 609             # return mask
 610             with m.State("CR_READ"):
 611                 # CR index to be read, which will be ready by the next cycle
 612                 cr_idx = Signal.like(cur_vl, reset_less=True)
 613                 # submit the read operation to the regfile
 614                 with m.If(cr_idx != cur_vl):
 615                     # the CR read port is unary ...
 616                     # ren = 1 << cr_idx
 617                     # ... in MSB0 convention ...
 618                     # ren = 1 << (7 - cr_idx)
 619                     # ... and with an offset:
 620                     # ren = 1 << (7 - off - cr_idx)
 621                     idx = SVP64CROffs.CRPred + cr_idx
 622                     comb += cr_pred.ren.eq(1 << (7 - idx))
 623                     # signal data valid in the next cycle
 624                     cr_read = Signal(reset_less=True)
 625                     sync += cr_read.eq(1)
 626                     # load the next index
 627                     sync += cr_idx.eq(cr_idx + 1)
 628                 with m.Else():
 629                     # exit on loop end
 630                     sync += cr_read.eq(0)
 631                     sync += cr_idx.eq(0)
 632                     m.next = "FETCH_PRED_SHIFT_MASK"
 633                 with m.If(cr_read):
 634                     # compensate for the one cycle delay on the regfile
 635                     cur_cr_idx = Signal.like(cur_vl)
 636                     comb += cur_cr_idx.eq(cr_idx - 1)
 637                     # read the CR field, select the appropriate bit
 638                     cr_field = Signal(4)
 639                     scr_bit = Signal()
 640                     dcr_bit = Signal()
 641                     comb += cr_field.eq(cr_pred.o_data)
 642                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 643                                        ^ scrinvert)
 644                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 645                                        ^ dcrinvert)
 646                     # set the corresponding mask bit
 647                     bit_to_set = Signal.like(self.srcmask)
 648                     comb += bit_to_set.eq(1 << cur_cr_idx)
 649                     with m.If(scr_bit):
 650                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 651                     with m.If(dcr_bit):
 652                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 653
 654             with m.State("FETCH_PRED_SHIFT_MASK"):
 655                 # shift-out skipped mask bits
 656                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 657                 sync += self.dstmask.eq(new_dstmask >> dststep)
 658                 m.next = "FETCH_PRED_DONE"
 659
 660             with m.State("FETCH_PRED_DONE"):
 661                 comb += pred_mask_o_valid.eq(1)
 662                 with m.If(pred_mask_i_ready):
 663                     m.next = "FETCH_PRED_IDLE"
 664
 665     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 666                   dbg, core_rst, is_svp64_mode,
 667                   fetch_pc_o_ready, fetch_pc_i_valid,
 668                   fetch_insn_o_valid, fetch_insn_i_ready,
 669                   pred_insn_i_valid, pred_insn_o_ready,
 670                   pred_mask_o_valid, pred_mask_i_ready,
 671                   exec_insn_i_valid, exec_insn_o_ready,
 672                   exec_pc_o_valid, exec_pc_i_ready):
 673         """issue FSM
 674
 675         decode / issue FSM.  this interacts with the "fetch" FSM
 676         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 677         (outgoing). also interacts with the "execute" FSM
 678         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 679         (incoming).
 680         SVP64 RM prefixes have already been set up by the
 681         "fetch" phase, so execute is fairly straightforward.
 682         """
 683
 684         comb = m.d.comb
 685         sync = m.d.sync
 686         pdecode2 = self.pdecode2
 687         cur_state = self.cur_state
 688
 689         # temporaries
 690         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 691
 692         # for updating svstate (things like srcstep etc.)
 693         update_svstate = Signal()  # set this (below) if updating
 694         new_svstate = SVSTATERec("new_svstate")
 695         comb += new_svstate.eq(cur_state.svstate)
 696
 697         # precalculate srcstep+1 and dststep+1
 698         cur_srcstep = cur_state.svstate.srcstep
 699         cur_dststep = cur_state.svstate.dststep
 700         next_srcstep = Signal.like(cur_srcstep)
 701         next_dststep = Signal.like(cur_dststep)
 702         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 703         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 704
 705         # note if an exception happened.  in a pipelined or OoO design
 706         # this needs to be accompanied by "shadowing" (or stalling)
 707         exc_happened = self.core.o.exc_happened
 708         # also note instruction fetch failed
 709         if hasattr(core, "icache"):
 710             fetch_failed = core.icache.i_out.fetch_failed
 711         else:
 712             fetch_failed = Const(0, 1)
 713         # set to fault in decoder
 714         # update (highest priority) instruction fault
 715         comb += pdecode2.instr_fault.eq(fetch_failed)
 716
 717         with m.FSM(name="issue_fsm"):
 718
 719             # sync with the "fetch" phase which is reading the instruction
 720             # at this point, there is no instruction running, that
 721             # could inadvertently update the PC.
 722             with m.State("ISSUE_START"):
 723                 # wait on "core stop" release, before next fetch
 724                 # need to do this here, in case we are in a VL==0 loop
 725                 with m.If(~dbg.core_stop_o & ~core_rst):
 726                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 727                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 728                         m.next = "INSN_WAIT"
 729                 with m.Else():
 730                     # tell core it's stopped, and acknowledge debug handshake
 731                     comb += dbg.core_stopped_i.eq(1)
 732                     # while stopped, allow updating the PC and SVSTATE
 733                     with m.If(self.pc_i.ok):
 734                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 735                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 736                         sync += pc_changed.eq(1)
 737                     with m.If(self.svstate_i.ok):
 738                         comb += new_svstate.eq(self.svstate_i.data)
 739                         comb += update_svstate.eq(1)
 740                         sync += sv_changed.eq(1)
 741
 742             # wait for an instruction to arrive from Fetch
 743             with m.State("INSN_WAIT"):
 744                 if self.allow_overlap:
 745                     stopping = dbg.stopping_o
 746                 else:
 747                     stopping = Const(0)
 748                 with m.If(stopping):
 749                     # stopping: jump back to idle
 750                     m.next = "ISSUE_START"
 751                 with m.Else():
 752                     comb += fetch_insn_i_ready.eq(1)
 753                     with m.If(fetch_insn_o_valid):
 754                         # loop into ISSUE_START if it's a SVP64 instruction
 755                         # and VL == 0.  this because VL==0 is a for-loop
 756                         # from 0 to 0 i.e. always, always a NOP.
 757                         cur_vl = cur_state.svstate.vl
 758                         with m.If(is_svp64_mode & (cur_vl == 0)):
 759                             # update the PC before fetching the next instruction
 760                             # since we are in a VL==0 loop, no instruction was
 761                             # executed that we could be overwriting
 762                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 763                             comb += self.state_w_pc.i_data.eq(nia)
 764                             comb += self.insn_done.eq(1)
 765                             m.next = "ISSUE_START"
 766                         with m.Else():
 767                             if self.svp64_en:
 768                                 m.next = "PRED_START"  # fetching predicate
 769                             else:
 770                                 m.next = "DECODE_SV"  # skip predication
 771
 772             with m.State("PRED_START"):
 773                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 774                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 775                     m.next = "MASK_WAIT"
 776
 777             with m.State("MASK_WAIT"):
 778                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
 779                 with m.If(pred_mask_o_valid):  # predication masks are ready
 780                     m.next = "PRED_SKIP"
 781
 782             # skip zeros in predicate
 783             with m.State("PRED_SKIP"):
 784                 with m.If(~is_svp64_mode):
 785                     m.next = "DECODE_SV"  # nothing to do
 786                 with m.Else():
 787                     if self.svp64_en:
 788                         pred_src_zero = pdecode2.rm_dec.pred_sz
 789                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 790
 791                         # new srcstep, after skipping zeros
 792                         skip_srcstep = Signal.like(cur_srcstep)
 793                         # value to be added to the current srcstep
 794                         src_delta = Signal.like(cur_srcstep)
 795                         # add leading zeros to srcstep, if not in zero mode
 796                         with m.If(~pred_src_zero):
 797                             # priority encoder (count leading zeros)
 798                             # append guard bit, in case the mask is all zeros
 799                             pri_enc_src = PriorityEncoder(65)
 800                             m.submodules.pri_enc_src = pri_enc_src
 801                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 802                                                          Const(1, 1)))
 803                             comb += src_delta.eq(pri_enc_src.o)
 804                         # apply delta to srcstep
 805                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 806                         # shift-out all leading zeros from the mask
 807                         # plus the leading "one" bit
 808                         # TODO count leading zeros and shift-out the zero
 809                         #      bits, in the same step, in hardware
 810                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 811
 812                         # same as above, but for dststep
 813                         skip_dststep = Signal.like(cur_dststep)
 814                         dst_delta = Signal.like(cur_dststep)
 815                         with m.If(~pred_dst_zero):
 816                             pri_enc_dst = PriorityEncoder(65)
 817                             m.submodules.pri_enc_dst = pri_enc_dst
 818                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 819                                                          Const(1, 1)))
 820                             comb += dst_delta.eq(pri_enc_dst.o)
 821                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 822                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 823
 824                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 825                         with m.If((skip_srcstep >= cur_vl) |
 826                                   (skip_dststep >= cur_vl)):
 827                             # end of VL loop. Update PC and reset src/dst step
 828                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 829                             comb += self.state_w_pc.i_data.eq(nia)
 830                             comb += new_svstate.srcstep.eq(0)
 831                             comb += new_svstate.dststep.eq(0)
 832                             comb += update_svstate.eq(1)
 833                             # synchronize with the simulator
 834                             comb += self.insn_done.eq(1)
 835                             # go back to Issue
 836                             m.next = "ISSUE_START"
 837                         with m.Else():
 838                             # update new src/dst step
 839                             comb += new_svstate.srcstep.eq(skip_srcstep)
 840                             comb += new_svstate.dststep.eq(skip_dststep)
 841                             comb += update_svstate.eq(1)
 842                             # proceed to Decode
 843                             m.next = "DECODE_SV"
 844
 845                         # pass predicate mask bits through to satellite decoders
 846                         # TODO: for SIMD this will be *multiple* bits
 847                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 848                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 849
 850             # after src/dst step have been updated, we are ready
 851             # to decode the instruction
 852             with m.State("DECODE_SV"):
 853                 # decode the instruction
 854                 sync += core.i.e.eq(pdecode2.e)
 855                 sync += core.i.state.eq(cur_state)
 856                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 857                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 858                 if self.svp64_en:
 859                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 860                     # set RA_OR_ZERO detection in satellite decoders
 861                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 862                     # and svp64 detection
 863                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 864                     # and svp64 bit-rev'd ldst mode
 865                     ldst_dec = pdecode2.use_svp64_ldst_dec
 866                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 867                 # after decoding, reset any previous exception condition,
 868                 # allowing it to be set again during the next execution
 869                 sync += pdecode2.ldst_exc.eq(0)
 870
 871                 m.next = "INSN_EXECUTE"  # move to "execute"
 872
 873             # handshake with execution FSM, move to "wait" once acknowledged
 874             with m.State("INSN_EXECUTE"):
 875                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 876                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 877                     m.next = "EXECUTE_WAIT"
 878
 879             with m.State("EXECUTE_WAIT"):
 880                 # wait on "core stop" release, at instruction end
 881                 # need to do this here, in case we are in a VL>1 loop
 882                 with m.If(~dbg.core_stop_o & ~core_rst):
 883                     comb += exec_pc_i_ready.eq(1)
 884                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 885                     # the exception info needs to be blatted into
 886                     # pdecode.ldst_exc, and the instruction "re-run".
 887                     # when ldst_exc.happened is set, the PowerDecoder2
 888                     # reacts very differently: it re-writes the instruction
 889                     # with a "trap" (calls PowerDecoder2.trap()) which
 890                     # will *overwrite* whatever was requested and jump the
 891                     # PC to the exception address, as well as alter MSR.
 892                     # nothing else needs to be done other than to note
 893                     # the change of PC and MSR (and, later, SVSTATE)
 894                     with m.If(exc_happened):
 895                         mmu = core.fus.get_exc("mmu0")
 896                         ldst = core.fus.get_exc("ldst0")
 897                         with m.If(fetch_failed):
 898                             # instruction fetch: exception is from MMU
 899                             sync += pdecode2.ldst_exc.eq(mmu)
 900                         with m.Else():
 901                             # otherwise assume it was a LDST exception
 902                             sync += pdecode2.ldst_exc.eq(ldst)
 903
 904                     with m.If(exec_pc_o_valid):
 905
 906                         # was this the last loop iteration?
 907                         is_last = Signal()
 908                         cur_vl = cur_state.svstate.vl
 909                         comb += is_last.eq(next_srcstep == cur_vl)
 910
 911                         # return directly to Decode if Execute generated an
 912                         # exception.
 913                         with m.If(pdecode2.ldst_exc.happened):
 914                             m.next = "DECODE_SV"
 915
 916                         # if either PC or SVSTATE were changed by the previous
 917                         # instruction, go directly back to Fetch, without
 918                         # updating either PC or SVSTATE
 919                         with m.Elif(pc_changed | sv_changed):
 920                             m.next = "ISSUE_START"
 921
 922                         # also return to Fetch, when no output was a vector
 923                         # (regardless of SRCSTEP and VL), or when the last
 924                         # instruction was really the last one of the VL loop
 925                         with m.Elif((~pdecode2.loop_continue) | is_last):
 926                             # before going back to fetch, update the PC state
 927                             # register with the NIA.
 928                             # ok here we are not reading the branch unit.
 929                             # TODO: this just blithely overwrites whatever
 930                             #       pipeline updated the PC
 931                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 932                             comb += self.state_w_pc.i_data.eq(nia)
 933                             # reset SRCSTEP before returning to Fetch
 934                             if self.svp64_en:
 935                                 with m.If(pdecode2.loop_continue):
 936                                     comb += new_svstate.srcstep.eq(0)
 937                                     comb += new_svstate.dststep.eq(0)
 938                                     comb += update_svstate.eq(1)
 939                             else:
 940                                 comb += new_svstate.srcstep.eq(0)
 941                                 comb += new_svstate.dststep.eq(0)
 942                                 comb += update_svstate.eq(1)
 943                             m.next = "ISSUE_START"
 944
 945                         # returning to Execute? then, first update SRCSTEP
 946                         with m.Else():
 947                             comb += new_svstate.srcstep.eq(next_srcstep)
 948                             comb += new_svstate.dststep.eq(next_dststep)
 949                             comb += update_svstate.eq(1)
 950                             # return to mask skip loop
 951                             m.next = "PRED_SKIP"
 952
 953                 with m.Else():
 954                     comb += dbg.core_stopped_i.eq(1)
 955                     # while stopped, allow updating the PC and SVSTATE
 956                     with m.If(self.pc_i.ok):
 957                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 958                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 959                         sync += pc_changed.eq(1)
 960                     with m.If(self.svstate_i.ok):
 961                         comb += new_svstate.eq(self.svstate_i.data)
 962                         comb += update_svstate.eq(1)
 963                         sync += sv_changed.eq(1)
 964
 965         # check if svstate needs updating: if so, write it to State Regfile
 966         with m.If(update_svstate):
 967             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 968             comb += self.state_w_sv.i_data.eq(new_svstate)
 969             sync += cur_state.svstate.eq(new_svstate)  # for next clock
 970
 971     def execute_fsm(self, m, core, pc_changed, sv_changed,
 972                     exec_insn_i_valid, exec_insn_o_ready,
 973                     exec_pc_o_valid, exec_pc_i_ready):
 974         """execute FSM
 975
 976         execute FSM. this interacts with the "issue" FSM
 977         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 978         (outgoing). SVP64 RM prefixes have already been set up by the
 979         "issue" phase, so execute is fairly straightforward.
 980         """
 981
 982         comb = m.d.comb
 983         sync = m.d.sync
 984         pdecode2 = self.pdecode2
 985
 986         # temporaries
 987         core_busy_o = core.n.o_data.busy_o  # core is busy
 988         core_ivalid_i = core.p.i_valid              # instruction is valid
 989
 990         with m.FSM(name="exec_fsm"):
 991
 992             # waiting for instruction bus (stays there until not busy)
 993             with m.State("INSN_START"):
 994                 comb += exec_insn_o_ready.eq(1)
 995                 with m.If(exec_insn_i_valid):
 996                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 997                     sync += sv_changed.eq(0)
 998                     sync += pc_changed.eq(0)
 999                     with m.If(core.p.o_ready):  # only move if accepted
1000                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1001
1002             # instruction started: must wait till it finishes
1003             with m.State("INSN_ACTIVE"):
1004                 # note changes to PC and SVSTATE
1005                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1006                     sync += sv_changed.eq(1)
1007                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1008                     sync += pc_changed.eq(1)
1009                 with m.If(~core_busy_o):  # instruction done!
1010                     comb += exec_pc_o_valid.eq(1)
1011                     with m.If(exec_pc_i_ready):
1012                         # when finished, indicate "done".
1013                         # however, if there was an exception, the instruction
1014                         # is *not* yet done.  this is an implementation
1015                         # detail: we choose to implement exceptions by
1016                         # taking the exception information from the LDST
1017                         # unit, putting that *back* into the PowerDecoder2,
1018                         # and *re-running the entire instruction*.
1019                         # if we erroneously indicate "done" here, it is as if
1020                         # there were *TWO* instructions:
1021                         # 1) the failed LDST 2) a TRAP.
1022                         with m.If(~pdecode2.ldst_exc.happened):
1023                             comb += self.insn_done.eq(1)
1024                         m.next = "INSN_START"  # back to fetch
1025
1026     def setup_peripherals(self, m):
1027         comb, sync = m.d.comb, m.d.sync
1028
1029         # okaaaay so the debug module must be in coresync clock domain
1030         # but NOT its reset signal. to cope with this, set every single
1031         # submodule explicitly in coresync domain, debug and JTAG
1032         # in their own one but using *external* reset.
1033         csd = DomainRenamer("coresync")
1034         dbd = DomainRenamer(self.dbg_domain)
1035
1036         m.submodules.core = core = csd(self.core)
1037         # this _so_ needs sorting out.  ICache is added down inside
1038         # LoadStore1 and is already a submodule of LoadStore1
1039         if not isinstance(self.imem, ICache):
1040             m.submodules.imem = imem = csd(self.imem)
1041         m.submodules.dbg = dbg = dbd(self.dbg)
1042         if self.jtag_en:
1043             m.submodules.jtag = jtag = dbd(self.jtag)
1044             # TODO: UART2GDB mux, here, from external pin
1045             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1046             sync += dbg.dmi.connect_to(jtag.dmi)
1047
1048         cur_state = self.cur_state
1049
1050         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1051         if self.sram4x4k:
1052             for i, sram in enumerate(self.sram4k):
1053                 m.submodules["sram4k_%d" % i] = csd(sram)
1054                 comb += sram.enable.eq(self.wb_sram_en)
1055
1056         # XICS interrupt handler
1057         if self.xics:
1058             m.submodules.xics_icp = icp = csd(self.xics_icp)
1059             m.submodules.xics_ics = ics = csd(self.xics_ics)
1060             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1061             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
1062
1063         # GPIO test peripheral
1064         if self.gpio:
1065             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1066
1067         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1068         # XXX causes litex ECP5 test to get wrong idea about input and output
1069         # (but works with verilator sim *sigh*)
1070         # if self.gpio and self.xics:
1071         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1072
1073         # instruction decoder
1074         pdecode = create_pdecode()
1075         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1076         if self.svp64_en:
1077             m.submodules.svp64 = svp64 = csd(self.svp64)
1078
1079         # convenience
1080         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1081         intrf = self.core.regs.rf['int']
1082
1083         # clock delay power-on reset
1084         cd_por = ClockDomain(reset_less=True)
1085         cd_sync = ClockDomain()
1086         core_sync = ClockDomain("coresync")
1087         m.domains += cd_por, cd_sync, core_sync
1088         if self.dbg_domain != "sync":
1089             dbg_sync = ClockDomain(self.dbg_domain)
1090             m.domains += dbg_sync
1091
1092         ti_rst = Signal(reset_less=True)
1093         delay = Signal(range(4), reset=3)
1094         with m.If(delay != 0):
1095             m.d.por += delay.eq(delay - 1)
1096         comb += cd_por.clk.eq(ClockSignal())
1097
1098         # power-on reset delay
1099         core_rst = ResetSignal("coresync")
1100         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1101         comb += core_rst.eq(ti_rst)
1102
1103         # debug clock is same as coresync, but reset is *main external*
1104         if self.dbg_domain != "sync":
1105             dbg_rst = ResetSignal(self.dbg_domain)
1106             comb += dbg_rst.eq(ResetSignal())
1107
1108         # busy/halted signals from core
1109         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
1110         comb += self.busy_o.eq(core_busy_o)
1111         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1112
1113         # temporary hack: says "go" immediately for both address gen and ST
1114         l0 = core.l0
1115         ldst = core.fus.fus['ldst0']
1116         st_go_edge = rising_edge(m, ldst.st.rel_o)
1117         # link addr-go direct to rel
1118         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1119         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
1120
1121     def elaborate(self, platform):
1122         m = Module()
1123         # convenience
1124         comb, sync = m.d.comb, m.d.sync
1125         cur_state = self.cur_state
1126         pdecode2 = self.pdecode2
1127         dbg = self.dbg
1128         core = self.core
1129
1130         # set up peripherals and core
1131         core_rst = self.core_rst
1132         self.setup_peripherals(m)
1133
1134         # reset current state if core reset requested
1135         with m.If(core_rst):
1136             m.d.sync += self.cur_state.eq(0)
1137
1138         # PC and instruction from I-Memory
1139         comb += self.pc_o.eq(cur_state.pc)
1140         pc_changed = Signal()  # note write to PC
1141         sv_changed = Signal()  # note write to SVSTATE
1142
1143         # indicate to outside world if any FU is still executing
1144         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1145
1146         # read state either from incoming override or from regfile
1147         # TODO: really should be doing MSR in the same way
1148         pc = state_get(m, core_rst, self.pc_i,
1149                        "pc",                  # read PC
1150                        self.state_r_pc, StateRegs.PC)
1151         svstate = state_get(m, core_rst, self.svstate_i,
1152                             "svstate",   # read SVSTATE
1153                             self.state_r_sv, StateRegs.SVSTATE)
1154
1155         # don't write pc every cycle
1156         comb += self.state_w_pc.wen.eq(0)
1157         comb += self.state_w_pc.i_data.eq(0)
1158
1159         # address of the next instruction, in the absence of a branch
1160         # depends on the instruction size
1161         nia = Signal(64)
1162
1163         # connect up debug signals
1164         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1165         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1166         comb += dbg.state.pc.eq(pc)
1167         comb += dbg.state.svstate.eq(svstate)
1168         comb += dbg.state.msr.eq(cur_state.msr)
1169
1170         # pass the prefix mode from Fetch to Issue, so the latter can loop
1171         # on VL==0
1172         is_svp64_mode = Signal()
1173
1174         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1175         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1176         # these are the handshake signals between each
1177
1178         # fetch FSM can run as soon as the PC is valid
1179         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1180         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1181
1182         # fetch FSM hands over the instruction to be decoded / issued
1183         fetch_insn_o_valid = Signal()
1184         fetch_insn_i_ready = Signal()
1185
1186         # predicate fetch FSM decodes and fetches the predicate
1187         pred_insn_i_valid = Signal()
1188         pred_insn_o_ready = Signal()
1189
1190         # predicate fetch FSM delivers the masks
1191         pred_mask_o_valid = Signal()
1192         pred_mask_i_ready = Signal()
1193
1194         # issue FSM delivers the instruction to the be executed
1195         exec_insn_i_valid = Signal()
1196         exec_insn_o_ready = Signal()
1197
1198         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1199         exec_pc_o_valid = Signal()
1200         exec_pc_i_ready = Signal()
1201
1202         # the FSMs here are perhaps unusual in that they detect conditions
1203         # then "hold" information, combinatorially, for the core
1204         # (as opposed to using sync - which would be on a clock's delay)
1205         # this includes the actual opcode, valid flags and so on.
1206
1207         # Fetch, then predicate fetch, then Issue, then Execute.
1208         # Issue is where the VL for-loop # lives.  the ready/valid
1209         # signalling is used to communicate between the four.
1210
1211         # set up Fetch FSM
1212         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1213                          self.imem, core_rst, pdecode2, cur_state,
1214                          dbg, core, svstate, nia, is_svp64_mode)
1215         m.submodules.fetch = fetch
1216         # connect up in/out data to existing Signals
1217         comb += fetch.p.i_data.pc.eq(pc)
1218         # and the ready/valid signalling
1219         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1220         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1221         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1222         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1223
1224         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1225                        dbg, core_rst, is_svp64_mode,
1226                        fetch_pc_o_ready, fetch_pc_i_valid,
1227                        fetch_insn_o_valid, fetch_insn_i_ready,
1228                        pred_insn_i_valid, pred_insn_o_ready,
1229                        pred_mask_o_valid, pred_mask_i_ready,
1230                        exec_insn_i_valid, exec_insn_o_ready,
1231                        exec_pc_o_valid, exec_pc_i_ready)
1232
1233         if self.svp64_en:
1234             self.fetch_predicate_fsm(m,
1235                                      pred_insn_i_valid, pred_insn_o_ready,
1236                                      pred_mask_o_valid, pred_mask_i_ready)
1237
1238         self.execute_fsm(m, core, pc_changed, sv_changed,
1239                          exec_insn_i_valid, exec_insn_o_ready,
1240                          exec_pc_o_valid, exec_pc_i_ready)
1241
1242         # this bit doesn't have to be in the FSM: connect up to read
1243         # regfiles on demand from DMI
1244         self.do_dmi(m, dbg)
1245
1246         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1247         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1248         self.tb_dec_fsm(m, cur_state.dec)
1249
1250         return m
1251
1252     def do_dmi(self, m, dbg):
1253         """deals with DMI debug requests
1254
1255         currently only provides read requests for the INT regfile, CR and XER
1256         it will later also deal with *writing* to these regfiles.
1257         """
1258         comb = m.d.comb
1259         sync = m.d.sync
1260         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1261         intrf = self.core.regs.rf['int']
1262
1263         with m.If(d_reg.req):  # request for regfile access being made
1264             # TODO: error-check this
1265             # XXX should this be combinatorial?  sync better?
1266             if intrf.unary:
1267                 comb += self.int_r.ren.eq(1 << d_reg.addr)
1268             else:
1269                 comb += self.int_r.addr.eq(d_reg.addr)
1270                 comb += self.int_r.ren.eq(1)
1271         d_reg_delay = Signal()
1272         sync += d_reg_delay.eq(d_reg.req)
1273         with m.If(d_reg_delay):
1274             # data arrives one clock later
1275             comb += d_reg.data.eq(self.int_r.o_data)
1276             comb += d_reg.ack.eq(1)
1277
1278         # sigh same thing for CR debug
1279         with m.If(d_cr.req):  # request for regfile access being made
1280             comb += self.cr_r.ren.eq(0b11111111)  # enable all
1281         d_cr_delay = Signal()
1282         sync += d_cr_delay.eq(d_cr.req)
1283         with m.If(d_cr_delay):
1284             # data arrives one clock later
1285             comb += d_cr.data.eq(self.cr_r.o_data)
1286             comb += d_cr.ack.eq(1)
1287
1288         # aaand XER...
1289         with m.If(d_xer.req):  # request for regfile access being made
1290             comb += self.xer_r.ren.eq(0b111111)  # enable all
1291         d_xer_delay = Signal()
1292         sync += d_xer_delay.eq(d_xer.req)
1293         with m.If(d_xer_delay):
1294             # data arrives one clock later
1295             comb += d_xer.data.eq(self.xer_r.o_data)
1296             comb += d_xer.ack.eq(1)
1297
1298     def tb_dec_fsm(self, m, spr_dec):
1299         """tb_dec_fsm
1300
1301         this is a FSM for updating either dec or tb.  it runs alternately
1302         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1303         value to DEC, however the regfile has "passthrough" on it so this
1304         *should* be ok.
1305
1306         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1307         """
1308
1309         comb, sync = m.d.comb, m.d.sync
1310         fast_rf = self.core.regs.rf['fast']
1311         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
1312         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
1313
1314         with m.FSM() as fsm:
1315
1316             # initiates read of current DEC
1317             with m.State("DEC_READ"):
1318                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1319                 comb += fast_r_dectb.ren.eq(1)
1320                 m.next = "DEC_WRITE"
1321
1322             # waits for DEC read to arrive (1 cycle), updates with new value
1323             with m.State("DEC_WRITE"):
1324                 new_dec = Signal(64)
1325                 # TODO: MSR.LPCR 32-bit decrement mode
1326                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1327                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1328                 comb += fast_w_dectb.wen.eq(1)
1329                 comb += fast_w_dectb.i_data.eq(new_dec)
1330                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
1331                 m.next = "TB_READ"
1332
1333             # initiates read of current TB
1334             with m.State("TB_READ"):
1335                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1336                 comb += fast_r_dectb.ren.eq(1)
1337                 m.next = "TB_WRITE"
1338
1339             # waits for read TB to arrive, initiates write of current TB
1340             with m.State("TB_WRITE"):
1341                 new_tb = Signal(64)
1342                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1343                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1344                 comb += fast_w_dectb.wen.eq(1)
1345                 comb += fast_w_dectb.i_data.eq(new_tb)
1346                 m.next = "DEC_READ"
1347
1348         return m
1349
1350     def __iter__(self):
1351         yield from self.pc_i.ports()
1352         yield self.pc_o
1353         yield self.memerr_o
1354         yield from self.core.ports()
1355         yield from self.imem.ports()
1356         yield self.core_bigendian_i
1357         yield self.busy_o
1358
1359     def ports(self):
1360         return list(self)
1361
1362     def external_ports(self):
1363         ports = self.pc_i.ports()
1364         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1365                   ]
1366
1367         if self.jtag_en:
1368             ports += list(self.jtag.external_ports())
1369         else:
1370             # don't add DMI if JTAG is enabled
1371             ports += list(self.dbg.dmi.ports())
1372
1373         ports += list(self.imem.ibus.fields.values())
1374         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1375
1376         if self.sram4x4k:
1377             for sram in self.sram4k:
1378                 ports += list(sram.bus.fields.values())
1379
1380         if self.xics:
1381             ports += list(self.xics_icp.bus.fields.values())
1382             ports += list(self.xics_ics.bus.fields.values())
1383             ports.append(self.int_level_i)
1384
1385         if self.gpio:
1386             ports += list(self.simple_gpio.bus.fields.values())
1387             ports.append(self.gpio_o)
1388
1389         return ports
1390
1391     def ports(self):
1392         return list(self)
1393
1394
1395 class TestIssuer(Elaboratable):
1396     def __init__(self, pspec):
1397         self.ti = TestIssuerInternal(pspec)
1398         self.pll = DummyPLL(instance=True)
1399
1400         # PLL direct clock or not
1401         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1402         if self.pll_en:
1403             self.pll_test_o = Signal(reset_less=True)
1404             self.pll_vco_o = Signal(reset_less=True)
1405             self.clk_sel_i = Signal(2, reset_less=True)
1406             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1407             self.pllclk_clk = ClockSignal("pllclk")
1408
1409     def elaborate(self, platform):
1410         m = Module()
1411         comb = m.d.comb
1412
1413         # TestIssuer nominally runs at main clock, actually it is
1414         # all combinatorial internally except for coresync'd components
1415         m.submodules.ti = ti = self.ti
1416
1417         if self.pll_en:
1418             # ClockSelect runs at PLL output internal clock rate
1419             m.submodules.wrappll = pll = self.pll
1420
1421             # add clock domains from PLL
1422             cd_pll = ClockDomain("pllclk")
1423             m.domains += cd_pll
1424
1425             # PLL clock established.  has the side-effect of running clklsel
1426             # at the PLL's speed (see DomainRenamer("pllclk") above)
1427             pllclk = self.pllclk_clk
1428             comb += pllclk.eq(pll.clk_pll_o)
1429
1430             # wire up external 24mhz to PLL
1431             #comb += pll.clk_24_i.eq(self.ref_clk)
1432             # output 18 mhz PLL test signal, and analog oscillator out
1433             comb += self.pll_test_o.eq(pll.pll_test_o)
1434             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1435
1436             # input to pll clock selection
1437             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1438
1439             # now wire up ResetSignals.  don't mind them being in this domain
1440             pll_rst = ResetSignal("pllclk")
1441             comb += pll_rst.eq(ResetSignal())
1442
1443         # internal clock is set to selector clock-out.  has the side-effect of
1444         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1445         # debug clock runs at coresync internal clock
1446         cd_coresync = ClockDomain("coresync")
1447         #m.domains += cd_coresync
1448         if self.ti.dbg_domain != 'sync':
1449             cd_dbgsync = ClockDomain("dbgsync")
1450             #m.domains += cd_dbgsync
1451         intclk = ClockSignal("coresync")
1452         dbgclk = ClockSignal(self.ti.dbg_domain)
1453         # XXX BYPASS PLL XXX
1454         # XXX BYPASS PLL XXX
1455         # XXX BYPASS PLL XXX
1456         if self.pll_en:
1457             comb += intclk.eq(self.ref_clk)
1458         else:
1459             comb += intclk.eq(ClockSignal())
1460         if self.ti.dbg_domain != 'sync':
1461             dbgclk = ClockSignal(self.ti.dbg_domain)
1462             comb += dbgclk.eq(intclk)
1463
1464         return m
1465
1466     def ports(self):
1467         return list(self.ti.ports()) + list(self.pll.ports()) + \
1468             [ClockSignal(), ResetSignal()]
1469
1470     def external_ports(self):
1471         ports = self.ti.external_ports()
1472         ports.append(ClockSignal())
1473         ports.append(ResetSignal())
1474         if self.pll_en:
1475             ports.append(self.clk_sel_i)
1476             ports.append(self.pll.clk_24_i)
1477             ports.append(self.pll_test_o)
1478             ports.append(self.pll_vco_o)
1479             ports.append(self.pllclk_clk)
1480             ports.append(self.ref_clk)
1481         return ports
1482
1483
1484 if __name__ == '__main__':
1485     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1486              'spr': 1,
1487              'div': 1,
1488              'mul': 1,
1489              'shiftrot': 1
1490              }
1491     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1492                          imem_ifacetype='bare_wb',
1493                          addr_wid=48,
1494                          mask_wid=8,
1495                          reg_wid=64,
1496                          units=units)
1497     dut = TestIssuer(pspec)
1498     vl = main(dut, ports=dut.ports(), name="test_issuer")
1499
1500     if len(sys.argv) == 1:
1501         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1502         with open("test_issuer.il", "w") as f:
1503             f.write(vl)