src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the PC
  70     res = Signal(64, reset_less=True, name=name)
  71     res_ok_delay = Signal(name="%s_ok_delay" % name)
  72     with m.If(~core_rst):
  73         sync += res_ok_delay.eq(~state_i.ok)
  74         with m.If(state_i.ok):
  75             # incoming override (start from pc_i)
  76             comb += res.eq(state_i.data)
  77         with m.Else():
  78             # otherwise read StateRegs regfile for PC...
  79             comb += regfile.ren.eq(1 << regnum)
  80         # ... but on a 1-clock delay
  81         with m.If(res_ok_delay):
  82             comb += res.eq(regfile.o_data)
  83     return res
  84
  85
  86 def get_predint(m, mask, name):
  87     """decode SVP64 predicate integer mask field to reg number and invert
  88     this is identical to the equivalent function in ISACaller except that
  89     it doesn't read the INT directly, it just decodes "what needs to be done"
  90     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  91
  92     * all1s is set to indicate that no mask is to be applied.
  93     * regread indicates the GPR register number to be read
  94     * invert is set to indicate that the register value is to be inverted
  95     * unary indicates that the contents of the register is to be shifted 1<<r3
  96     """
  97     comb = m.d.comb
  98     regread = Signal(5, name=name+"regread")
  99     invert = Signal(name=name+"invert")
 100     unary = Signal(name=name+"unary")
 101     all1s = Signal(name=name+"all1s")
 102     with m.Switch(mask):
 103         with m.Case(SVP64PredInt.ALWAYS.value):
 104             comb += all1s.eq(1)      # use 0b1111 (all ones)
 105         with m.Case(SVP64PredInt.R3_UNARY.value):
 106             comb += regread.eq(3)
 107             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 108         with m.Case(SVP64PredInt.R3.value):
 109             comb += regread.eq(3)
 110         with m.Case(SVP64PredInt.R3_N.value):
 111             comb += regread.eq(3)
 112             comb += invert.eq(1)
 113         with m.Case(SVP64PredInt.R10.value):
 114             comb += regread.eq(10)
 115         with m.Case(SVP64PredInt.R10_N.value):
 116             comb += regread.eq(10)
 117             comb += invert.eq(1)
 118         with m.Case(SVP64PredInt.R30.value):
 119             comb += regread.eq(30)
 120         with m.Case(SVP64PredInt.R30_N.value):
 121             comb += regread.eq(30)
 122             comb += invert.eq(1)
 123     return regread, invert, unary, all1s
 124
 125
 126 def get_predcr(m, mask, name):
 127     """decode SVP64 predicate CR to reg number field and invert status
 128     this is identical to _get_predcr in ISACaller
 129     """
 130     comb = m.d.comb
 131     idx = Signal(2, name=name+"idx")
 132     invert = Signal(name=name+"crinvert")
 133     with m.Switch(mask):
 134         with m.Case(SVP64PredCR.LT.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(0)
 137         with m.Case(SVP64PredCR.GE.value):
 138             comb += idx.eq(CR.LT)
 139             comb += invert.eq(1)
 140         with m.Case(SVP64PredCR.GT.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(0)
 143         with m.Case(SVP64PredCR.LE.value):
 144             comb += idx.eq(CR.GT)
 145             comb += invert.eq(1)
 146         with m.Case(SVP64PredCR.EQ.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(0)
 149         with m.Case(SVP64PredCR.NE.value):
 150             comb += idx.eq(CR.EQ)
 151             comb += invert.eq(1)
 152         with m.Case(SVP64PredCR.SO.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(0)
 155         with m.Case(SVP64PredCR.NS.value):
 156             comb += idx.eq(CR.SO)
 157             comb += invert.eq(1)
 158     return idx, invert
 159
 160
 161 # Fetch Finite State Machine.
 162 # WARNING: there are currently DriverConflicts but it's actually working.
 163 # TODO, here: everything that is global in nature, information from the
 164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 165 # not only that: TestIssuerInternal.imem can entirely move into here
 166 # because imem is only ever accessed inside the FetchFSM.
 167 class FetchFSM(ControlBase):
 168     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 169                  pdecode2, cur_state,
 170                  dbg, core, svstate, nia, is_svp64_mode):
 171         self.allow_overlap = allow_overlap
 172         self.svp64_en = svp64_en
 173         self.imem = imem
 174         self.core_rst = core_rst
 175         self.pdecode2 = pdecode2
 176         self.cur_state = cur_state
 177         self.dbg = dbg
 178         self.core = core
 179         self.svstate = svstate
 180         self.nia = nia
 181         self.is_svp64_mode = is_svp64_mode
 182
 183         # set up pipeline ControlBase and allocate i/o specs
 184         # (unusual: normally done by the Pipeline API)
 185         super().__init__(stage=self)
 186         self.p.i_data, self.n.o_data = self.new_specs(None)
 187         self.i, self.o = self.p.i_data, self.n.o_data
 188
 189     # next 3 functions are Stage API Compliance
 190     def setup(self, m, i):
 191         pass
 192
 193     def ispec(self):
 194         return FetchInput()
 195
 196     def ospec(self):
 197         return FetchOutput()
 198
 199     def elaborate(self, platform):
 200         """fetch FSM
 201
 202         this FSM performs fetch of raw instruction data, partial-decodes
 203         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 204         read a 2nd 32-bit quantity if that occurs.
 205         """
 206         m = super().elaborate(platform)
 207
 208         dbg = self.dbg
 209         core = self.core
 210         pc = self.i.pc
 211         svstate = self.svstate
 212         nia = self.nia
 213         is_svp64_mode = self.is_svp64_mode
 214         fetch_pc_o_ready = self.p.o_ready
 215         fetch_pc_i_valid = self.p.i_valid
 216         fetch_insn_o_valid = self.n.o_valid
 217         fetch_insn_i_ready = self.n.i_ready
 218
 219         comb = m.d.comb
 220         sync = m.d.sync
 221         pdecode2 = self.pdecode2
 222         cur_state = self.cur_state
 223         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 224
 225         msr_read = Signal(reset=1)
 226
 227         # also note instruction fetch failed
 228         if hasattr(core, "icache"):
 229             fetch_failed = core.icache.i_out.fetch_failed
 230         else:
 231             fetch_failed = Const(0, 1)
 232
 233         # don't read msr every cycle
 234         staterf = self.core.regs.rf['state']
 235         state_r_msr = staterf.r_ports['msr']  # MSR rd
 236
 237         comb += state_r_msr.ren.eq(0)
 238
 239         with m.FSM(name='fetch_fsm'):
 240
 241             # waiting (zzz)
 242             with m.State("IDLE"):
 243                 with m.If(~dbg.stopping_o & ~fetch_failed):
 244                     comb += fetch_pc_o_ready.eq(1)
 245                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 246                     # instruction allowed to go: start by reading the PC
 247                     # capture the PC and also drop it into Insn Memory
 248                     # we have joined a pair of combinatorial memory
 249                     # lookups together.  this is Generally Bad.
 250                     comb += self.imem.a_pc_i.eq(pc)
 251                     comb += self.imem.a_i_valid.eq(1)
 252                     comb += self.imem.f_i_valid.eq(1)
 253                     sync += cur_state.pc.eq(pc)
 254                     sync += cur_state.svstate.eq(svstate)  # and svstate
 255
 256                     # initiate read of MSR. arrives one clock later
 257                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 258                     sync += msr_read.eq(0)
 259
 260                     m.next = "INSN_READ"  # move to "wait for bus" phase
 261
 262             # dummy pause to find out why simulation is not keeping up
 263             with m.State("INSN_READ"):
 264                 if self.allow_overlap:
 265                     stopping = dbg.stopping_o
 266                 else:
 267                     stopping = Const(0)
 268                 with m.If(stopping):
 269                     # stopping: jump back to idle
 270                     m.next = "IDLE"
 271                 with m.Else():
 272                     # one cycle later, msr/sv read arrives.  valid only once.
 273                     with m.If(~msr_read):
 274                         sync += msr_read.eq(1)  # yeah don't read it again
 275                         sync += cur_state.msr.eq(state_r_msr.o_data)
 276                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 277                         # busy but not fetch failed: stay in wait-read
 278                         comb += self.imem.a_i_valid.eq(1)
 279                         comb += self.imem.f_i_valid.eq(1)
 280                     with m.Else():
 281                         # not busy (or fetch failed!): instruction fetched
 282                         # when fetch failed, the instruction gets ignored
 283                         # by the decoder
 284                         insn = ~get_insn(self.imem.f_instr_o, cur_state.pc)
 285                         if self.svp64_en:
 286                             svp64 = self.svp64
 287                             # decode the SVP64 prefix, if any
 288                             comb += svp64.raw_opcode_in.eq(insn)
 289                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 290                             # pass the decoded prefix (if any) to PowerDecoder2
 291                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 292                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 293                             # remember whether this is a prefixed instruction,
 294                             # so the FSM can readily loop when VL==0
 295                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 296                             # calculate the address of the following instruction
 297                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 298                             sync += nia.eq(cur_state.pc + insn_size)
 299                             with m.If(~svp64.is_svp64_mode):
 300                                 # with no prefix, store the instruction
 301                                 # and hand it directly to the next FSM
 302                                 sync += dec_opcode_o.eq(insn)
 303                                 m.next = "INSN_READY"
 304                             with m.Else():
 305                                 # fetch the rest of the instruction from memory
 306                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 307                                 comb += self.imem.a_i_valid.eq(1)
 308                                 comb += self.imem.f_i_valid.eq(1)
 309                                 m.next = "INSN_READ2"
 310                         else:
 311                             # not SVP64 - 32-bit only
 312                             sync += nia.eq(cur_state.pc + 4)
 313                             sync += dec_opcode_o.eq(insn)
 314                             m.next = "INSN_READY"
 315
 316             with m.State("INSN_READ2"):
 317                 with m.If(self.imem.f_busy_o):  # zzz...
 318                     # busy: stay in wait-read
 319                     comb += self.imem.a_i_valid.eq(1)
 320                     comb += self.imem.f_i_valid.eq(1)
 321                 with m.Else():
 322                     # not busy: instruction fetched
 323                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 324                     sync += dec_opcode_o.eq(insn)
 325                     m.next = "INSN_READY"
 326                     # TODO: probably can start looking at pdecode2.rm_dec
 327                     # here or maybe even in INSN_READ state, if svp64_mode
 328                     # detected, in order to trigger - and wait for - the
 329                     # predicate reading.
 330                     if self.svp64_en:
 331                         pmode = pdecode2.rm_dec.predmode
 332                     """
 333                     if pmode != SVP64PredMode.ALWAYS.value:
 334                         fire predicate loading FSM and wait before
 335                         moving to INSN_READY
 336                     else:
 337                         sync += self.srcmask.eq(-1) # set to all 1s
 338                         sync += self.dstmask.eq(-1) # set to all 1s
 339                         m.next = "INSN_READY"
 340                     """
 341
 342             with m.State("INSN_READY"):
 343                 # hand over the instruction, to be decoded
 344                 comb += fetch_insn_o_valid.eq(1)
 345                 with m.If(fetch_insn_i_ready):
 346                     m.next = "IDLE"
 347
 348         # whatever was done above, over-ride it if core reset is held
 349         with m.If(self.core_rst):
 350             sync += nia.eq(0)
 351
 352         return m
 353
 354
 355 class TestIssuerInternal(Elaboratable):
 356     """TestIssuer - reads instructions from TestMemory and issues them
 357
 358     efficiency and speed is not the main goal here: functional correctness
 359     and code clarity is.  optimisations (which almost 100% interfere with
 360     easy understanding) come later.
 361     """
 362
 363     def __init__(self, pspec):
 364
 365         # test is SVP64 is to be enabled
 366         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 367
 368         # and if regfiles are reduced
 369         self.regreduce_en = (hasattr(pspec, "regreduce") and
 370                              (pspec.regreduce == True))
 371
 372         # and if overlap requested
 373         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 374                               (pspec.allow_overlap == True))
 375
 376         # JTAG interface.  add this right at the start because if it's
 377         # added it *modifies* the pspec, by adding enable/disable signals
 378         # for parts of the rest of the core
 379         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 380         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 381         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 382         if self.jtag_en:
 383             # XXX MUST keep this up-to-date with litex, and
 384             # soc-cocotb-sim, and err.. all needs sorting out, argh
 385             subset = ['uart',
 386                       'mtwi',
 387                       'eint', 'gpio', 'mspi0',
 388                       # 'mspi1', - disabled for now
 389                       # 'pwm', 'sd0', - disabled for now
 390                       'sdr']
 391             self.jtag = JTAG(get_pinspecs(subset=subset),
 392                              domain=self.dbg_domain)
 393             # add signals to pspec to enable/disable icache and dcache
 394             # (or data and intstruction wishbone if icache/dcache not included)
 395             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 396             # TODO: do we actually care if these are not domain-synchronised?
 397             # honestly probably not.
 398             pspec.wb_icache_en = self.jtag.wb_icache_en
 399             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 400             self.wb_sram_en = self.jtag.wb_sram_en
 401         else:
 402             self.wb_sram_en = Const(1)
 403
 404         # add 4k sram blocks?
 405         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 406                          pspec.sram4x4kblock == True)
 407         if self.sram4x4k:
 408             self.sram4k = []
 409             for i in range(4):
 410                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 411                                                     # features={'err'}
 412                                                     ))
 413
 414         # add interrupt controller?
 415         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 416         if self.xics:
 417             self.xics_icp = XICS_ICP()
 418             self.xics_ics = XICS_ICS()
 419             self.int_level_i = self.xics_ics.int_level_i
 420
 421         # add GPIO peripheral?
 422         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 423         if self.gpio:
 424             self.simple_gpio = SimpleGPIO()
 425             self.gpio_o = self.simple_gpio.gpio_o
 426
 427         # main instruction core.  suitable for prototyping / demo only
 428         self.core = core = NonProductionCore(pspec)
 429         self.core_rst = ResetSignal("coresync")
 430
 431         # instruction decoder.  goes into Trap Record
 432         #pdecode = create_pdecode()
 433         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 434         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 435                                      opkls=IssuerDecode2ToOperand,
 436                                      svp64_en=self.svp64_en,
 437                                      regreduce_en=self.regreduce_en)
 438         pdecode = self.pdecode2.dec
 439
 440         if self.svp64_en:
 441             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 442
 443         # Test Instruction memory
 444         if hasattr(core, "icache"):
 445             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 446             # truly dreadful.  needs a huge reorg.
 447             pspec.icache = core.icache
 448         self.imem = ConfigFetchUnit(pspec).fu
 449
 450         # DMI interface
 451         self.dbg = CoreDebug()
 452
 453         # instruction go/monitor
 454         self.pc_o = Signal(64, reset_less=True)
 455         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 456         self.svstate_i = Data(64, "svstate_i")  # ditto
 457         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 458         self.busy_o = Signal(reset_less=True)
 459         self.memerr_o = Signal(reset_less=True)
 460
 461         # STATE regfile read /write ports for PC, MSR, SVSTATE
 462         staterf = self.core.regs.rf['state']
 463         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 464         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 465         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 466         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 467
 468         # DMI interface access
 469         intrf = self.core.regs.rf['int']
 470         crrf = self.core.regs.rf['cr']
 471         xerrf = self.core.regs.rf['xer']
 472         self.int_r = intrf.r_ports['dmi']  # INT read
 473         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 474         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 475
 476         if self.svp64_en:
 477             # for predication
 478             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 479             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 480
 481         # hack method of keeping an eye on whether branch/trap set the PC
 482         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 483         self.state_nia.wen.name = 'state_nia_wen'
 484
 485         # pulse to synchronize the simulator at instruction end
 486         self.insn_done = Signal()
 487
 488         # indicate any instruction still outstanding, in execution
 489         self.any_busy = Signal()
 490
 491         if self.svp64_en:
 492             # store copies of predicate masks
 493             self.srcmask = Signal(64)
 494             self.dstmask = Signal(64)
 495
 496     def fetch_predicate_fsm(self, m,
 497                             pred_insn_i_valid, pred_insn_o_ready,
 498                             pred_mask_o_valid, pred_mask_i_ready):
 499         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 500            src/dest predicate masks
 501
 502         https://bugs.libre-soc.org/show_bug.cgi?id=617
 503         the predicates can be read here, by using IntRegs r_ports['pred']
 504         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 505         be done through multiple reads, extracting one relevant at a time.
 506         later, a faster way would be to use the 32-bit-wide CR port but
 507         this is more complex decoding, here.  equivalent code used in
 508         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 509
 510         note: this ENTIRE FSM is not to be called when svp64 is disabled
 511         """
 512         comb = m.d.comb
 513         sync = m.d.sync
 514         pdecode2 = self.pdecode2
 515         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 516         predmode = rm_dec.predmode
 517         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 518         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 519         # get src/dst step, so we can skip already used mask bits
 520         cur_state = self.cur_state
 521         srcstep = cur_state.svstate.srcstep
 522         dststep = cur_state.svstate.dststep
 523         cur_vl = cur_state.svstate.vl
 524
 525         # decode predicates
 526         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 527         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 528         sidx, scrinvert = get_predcr(m, srcpred, 's')
 529         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 530
 531         # store fetched masks, for either intpred or crpred
 532         # when src/dst step is not zero, the skipped mask bits need to be
 533         # shifted-out, before actually storing them in src/dest mask
 534         new_srcmask = Signal(64, reset_less=True)
 535         new_dstmask = Signal(64, reset_less=True)
 536
 537         with m.FSM(name="fetch_predicate"):
 538
 539             with m.State("FETCH_PRED_IDLE"):
 540                 comb += pred_insn_o_ready.eq(1)
 541                 with m.If(pred_insn_i_valid):
 542                     with m.If(predmode == SVP64PredMode.INT):
 543                         # skip fetching destination mask register, when zero
 544                         with m.If(dall1s):
 545                             sync += new_dstmask.eq(-1)
 546                             # directly go to fetch source mask register
 547                             # guaranteed not to be zero (otherwise predmode
 548                             # would be SVP64PredMode.ALWAYS, not INT)
 549                             comb += int_pred.addr.eq(sregread)
 550                             comb += int_pred.ren.eq(1)
 551                             m.next = "INT_SRC_READ"
 552                         # fetch destination predicate register
 553                         with m.Else():
 554                             comb += int_pred.addr.eq(dregread)
 555                             comb += int_pred.ren.eq(1)
 556                             m.next = "INT_DST_READ"
 557                     with m.Elif(predmode == SVP64PredMode.CR):
 558                         # go fetch masks from the CR register file
 559                         sync += new_srcmask.eq(0)
 560                         sync += new_dstmask.eq(0)
 561                         m.next = "CR_READ"
 562                     with m.Else():
 563                         sync += self.srcmask.eq(-1)
 564                         sync += self.dstmask.eq(-1)
 565                         m.next = "FETCH_PRED_DONE"
 566
 567             with m.State("INT_DST_READ"):
 568                 # store destination mask
 569                 inv = Repl(dinvert, 64)
 570                 with m.If(dunary):
 571                     # set selected mask bit for 1<<r3 mode
 572                     dst_shift = Signal(range(64))
 573                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 574                     sync += new_dstmask.eq(1 << dst_shift)
 575                 with m.Else():
 576                     # invert mask if requested
 577                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 578                 # skip fetching source mask register, when zero
 579                 with m.If(sall1s):
 580                     sync += new_srcmask.eq(-1)
 581                     m.next = "FETCH_PRED_SHIFT_MASK"
 582                 # fetch source predicate register
 583                 with m.Else():
 584                     comb += int_pred.addr.eq(sregread)
 585                     comb += int_pred.ren.eq(1)
 586                     m.next = "INT_SRC_READ"
 587
 588             with m.State("INT_SRC_READ"):
 589                 # store source mask
 590                 inv = Repl(sinvert, 64)
 591                 with m.If(sunary):
 592                     # set selected mask bit for 1<<r3 mode
 593                     src_shift = Signal(range(64))
 594                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 595                     sync += new_srcmask.eq(1 << src_shift)
 596                 with m.Else():
 597                     # invert mask if requested
 598                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 599                 m.next = "FETCH_PRED_SHIFT_MASK"
 600
 601             # fetch masks from the CR register file
 602             # implements the following loop:
 603             # idx, inv = get_predcr(mask)
 604             # mask = 0
 605             # for cr_idx in range(vl):
 606             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 607             #     if cr[idx] ^ inv:
 608             #         mask |= 1 << cr_idx
 609             # return mask
 610             with m.State("CR_READ"):
 611                 # CR index to be read, which will be ready by the next cycle
 612                 cr_idx = Signal.like(cur_vl, reset_less=True)
 613                 # submit the read operation to the regfile
 614                 with m.If(cr_idx != cur_vl):
 615                     # the CR read port is unary ...
 616                     # ren = 1 << cr_idx
 617                     # ... in MSB0 convention ...
 618                     # ren = 1 << (7 - cr_idx)
 619                     # ... and with an offset:
 620                     # ren = 1 << (7 - off - cr_idx)
 621                     idx = SVP64CROffs.CRPred + cr_idx
 622                     comb += cr_pred.ren.eq(1 << (7 - idx))
 623                     # signal data valid in the next cycle
 624                     cr_read = Signal(reset_less=True)
 625                     sync += cr_read.eq(1)
 626                     # load the next index
 627                     sync += cr_idx.eq(cr_idx + 1)
 628                 with m.Else():
 629                     # exit on loop end
 630                     sync += cr_read.eq(0)
 631                     sync += cr_idx.eq(0)
 632                     m.next = "FETCH_PRED_SHIFT_MASK"
 633                 with m.If(cr_read):
 634                     # compensate for the one cycle delay on the regfile
 635                     cur_cr_idx = Signal.like(cur_vl)
 636                     comb += cur_cr_idx.eq(cr_idx - 1)
 637                     # read the CR field, select the appropriate bit
 638                     cr_field = Signal(4)
 639                     scr_bit = Signal()
 640                     dcr_bit = Signal()
 641                     comb += cr_field.eq(cr_pred.o_data)
 642                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 643                                        ^ scrinvert)
 644                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 645                                        ^ dcrinvert)
 646                     # set the corresponding mask bit
 647                     bit_to_set = Signal.like(self.srcmask)
 648                     comb += bit_to_set.eq(1 << cur_cr_idx)
 649                     with m.If(scr_bit):
 650                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 651                     with m.If(dcr_bit):
 652                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 653
 654             with m.State("FETCH_PRED_SHIFT_MASK"):
 655                 # shift-out skipped mask bits
 656                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 657                 sync += self.dstmask.eq(new_dstmask >> dststep)
 658                 m.next = "FETCH_PRED_DONE"
 659
 660             with m.State("FETCH_PRED_DONE"):
 661                 comb += pred_mask_o_valid.eq(1)
 662                 with m.If(pred_mask_i_ready):
 663                     m.next = "FETCH_PRED_IDLE"
 664
 665     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 666                   dbg, core_rst, is_svp64_mode,
 667                   fetch_pc_o_ready, fetch_pc_i_valid,
 668                   fetch_insn_o_valid, fetch_insn_i_ready,
 669                   pred_insn_i_valid, pred_insn_o_ready,
 670                   pred_mask_o_valid, pred_mask_i_ready,
 671                   exec_insn_i_valid, exec_insn_o_ready,
 672                   exec_pc_o_valid, exec_pc_i_ready):
 673         """issue FSM
 674
 675         decode / issue FSM.  this interacts with the "fetch" FSM
 676         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 677         (outgoing). also interacts with the "execute" FSM
 678         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 679         (incoming).
 680         SVP64 RM prefixes have already been set up by the
 681         "fetch" phase, so execute is fairly straightforward.
 682         """
 683
 684         comb = m.d.comb
 685         sync = m.d.sync
 686         pdecode2 = self.pdecode2
 687         cur_state = self.cur_state
 688
 689         # temporaries
 690         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 691
 692         # for updating svstate (things like srcstep etc.)
 693         update_svstate = Signal()  # set this (below) if updating
 694         new_svstate = SVSTATERec("new_svstate")
 695         comb += new_svstate.eq(cur_state.svstate)
 696
 697         # precalculate srcstep+1 and dststep+1
 698         cur_srcstep = cur_state.svstate.srcstep
 699         cur_dststep = cur_state.svstate.dststep
 700         next_srcstep = Signal.like(cur_srcstep)
 701         next_dststep = Signal.like(cur_dststep)
 702         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 703         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 704
 705         # note if an exception happened.  in a pipelined or OoO design
 706         # this needs to be accompanied by "shadowing" (or stalling)
 707         exc_happened = self.core.o.exc_happened
 708         # also note instruction fetch failed
 709         if hasattr(core, "icache"):
 710             fetch_failed = core.icache.i_out.fetch_failed
 711         else:
 712             fetch_failed = Const(0, 1)
 713         # set to fault in decoder
 714         # update (highest priority) instruction fault
 715         comb += pdecode2.instr_fault.eq(fetch_failed)
 716
 717         with m.FSM(name="issue_fsm"):
 718
 719             # sync with the "fetch" phase which is reading the instruction
 720             # at this point, there is no instruction running, that
 721             # could inadvertently update the PC.
 722             with m.State("ISSUE_START"):
 723                 # wait on "core stop" release, before next fetch
 724                 # need to do this here, in case we are in a VL==0 loop
 725                 with m.If(~dbg.core_stop_o & ~core_rst):
 726                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 727                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 728                         m.next = "INSN_WAIT"
 729                 with m.Else():
 730                     # tell core it's stopped, and acknowledge debug handshake
 731                     comb += dbg.core_stopped_i.eq(1)
 732                     # while stopped, allow updating the PC and SVSTATE
 733                     with m.If(self.pc_i.ok):
 734                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 735                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 736                         sync += pc_changed.eq(1)
 737                     with m.If(self.svstate_i.ok):
 738                         comb += new_svstate.eq(self.svstate_i.data)
 739                         comb += update_svstate.eq(1)
 740                         sync += sv_changed.eq(1)
 741
 742             # wait for an instruction to arrive from Fetch
 743             with m.State("INSN_WAIT"):
 744                 if self.allow_overlap:
 745                     stopping = dbg.stopping_o
 746                 else:
 747                     stopping = Const(0)
 748                 with m.If(stopping):
 749                     # stopping: jump back to idle
 750                     m.next = "ISSUE_START"
 751                 with m.Else():
 752                     comb += fetch_insn_i_ready.eq(1)
 753                     with m.If(fetch_insn_o_valid):
 754                         # loop into ISSUE_START if it's a SVP64 instruction
 755                         # and VL == 0.  this because VL==0 is a for-loop
 756                         # from 0 to 0 i.e. always, always a NOP.
 757                         cur_vl = cur_state.svstate.vl
 758                         with m.If(is_svp64_mode & (cur_vl == 0)):
 759                             # update the PC before fetching the next instruction
 760                             # since we are in a VL==0 loop, no instruction was
 761                             # executed that we could be overwriting
 762                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 763                             comb += self.state_w_pc.i_data.eq(nia)
 764                             comb += self.insn_done.eq(1)
 765                             m.next = "ISSUE_START"
 766                         with m.Else():
 767                             if self.svp64_en:
 768                                 m.next = "PRED_START"  # fetching predicate
 769                             else:
 770                                 m.next = "DECODE_SV"  # skip predication
 771
 772             with m.State("PRED_START"):
 773                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 774                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 775                     m.next = "MASK_WAIT"
 776
 777             with m.State("MASK_WAIT"):
 778                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
 779                 with m.If(pred_mask_o_valid):  # predication masks are ready
 780                     m.next = "PRED_SKIP"
 781
 782             # skip zeros in predicate
 783             with m.State("PRED_SKIP"):
 784                 with m.If(~is_svp64_mode):
 785                     m.next = "DECODE_SV"  # nothing to do
 786                 with m.Else():
 787                     if self.svp64_en:
 788                         pred_src_zero = pdecode2.rm_dec.pred_sz
 789                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 790
 791                         # new srcstep, after skipping zeros
 792                         skip_srcstep = Signal.like(cur_srcstep)
 793                         # value to be added to the current srcstep
 794                         src_delta = Signal.like(cur_srcstep)
 795                         # add leading zeros to srcstep, if not in zero mode
 796                         with m.If(~pred_src_zero):
 797                             # priority encoder (count leading zeros)
 798                             # append guard bit, in case the mask is all zeros
 799                             pri_enc_src = PriorityEncoder(65)
 800                             m.submodules.pri_enc_src = pri_enc_src
 801                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 802                                                          Const(1, 1)))
 803                             comb += src_delta.eq(pri_enc_src.o)
 804                         # apply delta to srcstep
 805                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 806                         # shift-out all leading zeros from the mask
 807                         # plus the leading "one" bit
 808                         # TODO count leading zeros and shift-out the zero
 809                         #      bits, in the same step, in hardware
 810                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 811
 812                         # same as above, but for dststep
 813                         skip_dststep = Signal.like(cur_dststep)
 814                         dst_delta = Signal.like(cur_dststep)
 815                         with m.If(~pred_dst_zero):
 816                             pri_enc_dst = PriorityEncoder(65)
 817                             m.submodules.pri_enc_dst = pri_enc_dst
 818                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 819                                                          Const(1, 1)))
 820                             comb += dst_delta.eq(pri_enc_dst.o)
 821                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 822                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 823
 824                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 825                         with m.If((skip_srcstep >= cur_vl) |
 826                                   (skip_dststep >= cur_vl)):
 827                             # end of VL loop. Update PC and reset src/dst step
 828                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 829                             comb += self.state_w_pc.i_data.eq(nia)
 830                             comb += new_svstate.srcstep.eq(0)
 831                             comb += new_svstate.dststep.eq(0)
 832                             comb += update_svstate.eq(1)
 833                             # synchronize with the simulator
 834                             comb += self.insn_done.eq(1)
 835                             # go back to Issue
 836                             m.next = "ISSUE_START"
 837                         with m.Else():
 838                             # update new src/dst step
 839                             comb += new_svstate.srcstep.eq(skip_srcstep)
 840                             comb += new_svstate.dststep.eq(skip_dststep)
 841                             comb += update_svstate.eq(1)
 842                             # proceed to Decode
 843                             m.next = "DECODE_SV"
 844
 845                         # pass predicate mask bits through to satellite decoders
 846                         # TODO: for SIMD this will be *multiple* bits
 847                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 848                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 849
 850             # after src/dst step have been updated, we are ready
 851             # to decode the instruction
 852             with m.State("DECODE_SV"):
 853                 # decode the instruction
 854                 sync += core.i.e.eq(pdecode2.e)
 855                 sync += core.i.state.eq(cur_state)
 856                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 857                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 858                 if self.svp64_en:
 859                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 860                     # set RA_OR_ZERO detection in satellite decoders
 861                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 862                     # and svp64 detection
 863                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 864                     # and svp64 bit-rev'd ldst mode
 865                     ldst_dec = pdecode2.use_svp64_ldst_dec
 866                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 867                 # after decoding, reset any previous exception condition,
 868                 # allowing it to be set again during the next execution
 869                 sync += pdecode2.ldst_exc.eq(0)
 870
 871                 m.next = "INSN_EXECUTE"  # move to "execute"
 872
 873             # handshake with execution FSM, move to "wait" once acknowledged
 874             with m.State("INSN_EXECUTE"):
 875                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 876                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 877                     m.next = "EXECUTE_WAIT"
 878
 879             with m.State("EXECUTE_WAIT"):
 880                 # wait on "core stop" release, at instruction end
 881                 # need to do this here, in case we are in a VL>1 loop
 882                 with m.If(~dbg.core_stop_o & ~core_rst):
 883                     comb += exec_pc_i_ready.eq(1)
 884                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 885                     # the exception info needs to be blatted into
 886                     # pdecode.ldst_exc, and the instruction "re-run".
 887                     # when ldst_exc.happened is set, the PowerDecoder2
 888                     # reacts very differently: it re-writes the instruction
 889                     # with a "trap" (calls PowerDecoder2.trap()) which
 890                     # will *overwrite* whatever was requested and jump the
 891                     # PC to the exception address, as well as alter MSR.
 892                     # nothing else needs to be done other than to note
 893                     # the change of PC and MSR (and, later, SVSTATE)
 894                     with m.If(exc_happened):
 895                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 896
 897                     with m.If(exec_pc_o_valid):
 898
 899                         # was this the last loop iteration?
 900                         is_last = Signal()
 901                         cur_vl = cur_state.svstate.vl
 902                         comb += is_last.eq(next_srcstep == cur_vl)
 903
 904                         # return directly to Decode if Execute generated an
 905                         # exception.
 906                         with m.If(pdecode2.ldst_exc.happened):
 907                             m.next = "DECODE_SV"
 908
 909                         # if either PC or SVSTATE were changed by the previous
 910                         # instruction, go directly back to Fetch, without
 911                         # updating either PC or SVSTATE
 912                         with m.Elif(pc_changed | sv_changed):
 913                             m.next = "ISSUE_START"
 914
 915                         # also return to Fetch, when no output was a vector
 916                         # (regardless of SRCSTEP and VL), or when the last
 917                         # instruction was really the last one of the VL loop
 918                         with m.Elif((~pdecode2.loop_continue) | is_last):
 919                             # before going back to fetch, update the PC state
 920                             # register with the NIA.
 921                             # ok here we are not reading the branch unit.
 922                             # TODO: this just blithely overwrites whatever
 923                             #       pipeline updated the PC
 924                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 925                             comb += self.state_w_pc.i_data.eq(nia)
 926                             # reset SRCSTEP before returning to Fetch
 927                             if self.svp64_en:
 928                                 with m.If(pdecode2.loop_continue):
 929                                     comb += new_svstate.srcstep.eq(0)
 930                                     comb += new_svstate.dststep.eq(0)
 931                                     comb += update_svstate.eq(1)
 932                             else:
 933                                 comb += new_svstate.srcstep.eq(0)
 934                                 comb += new_svstate.dststep.eq(0)
 935                                 comb += update_svstate.eq(1)
 936                             m.next = "ISSUE_START"
 937
 938                         # returning to Execute? then, first update SRCSTEP
 939                         with m.Else():
 940                             comb += new_svstate.srcstep.eq(next_srcstep)
 941                             comb += new_svstate.dststep.eq(next_dststep)
 942                             comb += update_svstate.eq(1)
 943                             # return to mask skip loop
 944                             m.next = "PRED_SKIP"
 945
 946                 with m.Else():
 947                     comb += dbg.core_stopped_i.eq(1)
 948                     # while stopped, allow updating the PC and SVSTATE
 949                     with m.If(self.pc_i.ok):
 950                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 951                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 952                         sync += pc_changed.eq(1)
 953                     with m.If(self.svstate_i.ok):
 954                         comb += new_svstate.eq(self.svstate_i.data)
 955                         comb += update_svstate.eq(1)
 956                         sync += sv_changed.eq(1)
 957
 958         # check if svstate needs updating: if so, write it to State Regfile
 959         with m.If(update_svstate):
 960             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 961             comb += self.state_w_sv.i_data.eq(new_svstate)
 962             sync += cur_state.svstate.eq(new_svstate)  # for next clock
 963
 964     def execute_fsm(self, m, core, pc_changed, sv_changed,
 965                     exec_insn_i_valid, exec_insn_o_ready,
 966                     exec_pc_o_valid, exec_pc_i_ready):
 967         """execute FSM
 968
 969         execute FSM. this interacts with the "issue" FSM
 970         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 971         (outgoing). SVP64 RM prefixes have already been set up by the
 972         "issue" phase, so execute is fairly straightforward.
 973         """
 974
 975         comb = m.d.comb
 976         sync = m.d.sync
 977         pdecode2 = self.pdecode2
 978
 979         # temporaries
 980         core_busy_o = core.n.o_data.busy_o  # core is busy
 981         core_ivalid_i = core.p.i_valid              # instruction is valid
 982
 983         with m.FSM(name="exec_fsm"):
 984
 985             # waiting for instruction bus (stays there until not busy)
 986             with m.State("INSN_START"):
 987                 comb += exec_insn_o_ready.eq(1)
 988                 with m.If(exec_insn_i_valid):
 989                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 990                     sync += sv_changed.eq(0)
 991                     sync += pc_changed.eq(0)
 992                     with m.If(core.p.o_ready):  # only move if accepted
 993                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 994
 995             # instruction started: must wait till it finishes
 996             with m.State("INSN_ACTIVE"):
 997                 # note changes to PC and SVSTATE
 998                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
 999                     sync += sv_changed.eq(1)
1000                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1001                     sync += pc_changed.eq(1)
1002                 with m.If(~core_busy_o):  # instruction done!
1003                     comb += exec_pc_o_valid.eq(1)
1004                     with m.If(exec_pc_i_ready):
1005                         # when finished, indicate "done".
1006                         # however, if there was an exception, the instruction
1007                         # is *not* yet done.  this is an implementation
1008                         # detail: we choose to implement exceptions by
1009                         # taking the exception information from the LDST
1010                         # unit, putting that *back* into the PowerDecoder2,
1011                         # and *re-running the entire instruction*.
1012                         # if we erroneously indicate "done" here, it is as if
1013                         # there were *TWO* instructions:
1014                         # 1) the failed LDST 2) a TRAP.
1015                         with m.If(~pdecode2.ldst_exc.happened):
1016                             comb += self.insn_done.eq(1)
1017                         m.next = "INSN_START"  # back to fetch
1018
1019     def setup_peripherals(self, m):
1020         comb, sync = m.d.comb, m.d.sync
1021
1022         # okaaaay so the debug module must be in coresync clock domain
1023         # but NOT its reset signal. to cope with this, set every single
1024         # submodule explicitly in coresync domain, debug and JTAG
1025         # in their own one but using *external* reset.
1026         csd = DomainRenamer("coresync")
1027         dbd = DomainRenamer(self.dbg_domain)
1028
1029         m.submodules.core = core = csd(self.core)
1030         # this _so_ needs sorting out.  ICache is added down inside
1031         # LoadStore1 and is already a submodule of LoadStore1
1032         if not isinstance(self.imem, ICache):
1033             m.submodules.imem = imem = csd(self.imem)
1034         m.submodules.dbg = dbg = dbd(self.dbg)
1035         if self.jtag_en:
1036             m.submodules.jtag = jtag = dbd(self.jtag)
1037             # TODO: UART2GDB mux, here, from external pin
1038             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1039             sync += dbg.dmi.connect_to(jtag.dmi)
1040
1041         cur_state = self.cur_state
1042
1043         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1044         if self.sram4x4k:
1045             for i, sram in enumerate(self.sram4k):
1046                 m.submodules["sram4k_%d" % i] = csd(sram)
1047                 comb += sram.enable.eq(self.wb_sram_en)
1048
1049         # XICS interrupt handler
1050         if self.xics:
1051             m.submodules.xics_icp = icp = csd(self.xics_icp)
1052             m.submodules.xics_ics = ics = csd(self.xics_ics)
1053             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1054             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
1055
1056         # GPIO test peripheral
1057         if self.gpio:
1058             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1059
1060         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1061         # XXX causes litex ECP5 test to get wrong idea about input and output
1062         # (but works with verilator sim *sigh*)
1063         # if self.gpio and self.xics:
1064         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1065
1066         # instruction decoder
1067         pdecode = create_pdecode()
1068         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1069         if self.svp64_en:
1070             m.submodules.svp64 = svp64 = csd(self.svp64)
1071
1072         # convenience
1073         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1074         intrf = self.core.regs.rf['int']
1075
1076         # clock delay power-on reset
1077         cd_por = ClockDomain(reset_less=True)
1078         cd_sync = ClockDomain()
1079         core_sync = ClockDomain("coresync")
1080         m.domains += cd_por, cd_sync, core_sync
1081         if self.dbg_domain != "sync":
1082             dbg_sync = ClockDomain(self.dbg_domain)
1083             m.domains += dbg_sync
1084
1085         ti_rst = Signal(reset_less=True)
1086         delay = Signal(range(4), reset=3)
1087         with m.If(delay != 0):
1088             m.d.por += delay.eq(delay - 1)
1089         comb += cd_por.clk.eq(ClockSignal())
1090
1091         # power-on reset delay
1092         core_rst = ResetSignal("coresync")
1093         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1094         comb += core_rst.eq(ti_rst)
1095
1096         # debug clock is same as coresync, but reset is *main external*
1097         if self.dbg_domain != "sync":
1098             dbg_rst = ResetSignal(self.dbg_domain)
1099             comb += dbg_rst.eq(ResetSignal())
1100
1101         # busy/halted signals from core
1102         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
1103         comb += self.busy_o.eq(core_busy_o)
1104         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1105
1106         # temporary hack: says "go" immediately for both address gen and ST
1107         l0 = core.l0
1108         ldst = core.fus.fus['ldst0']
1109         st_go_edge = rising_edge(m, ldst.st.rel_o)
1110         # link addr-go direct to rel
1111         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1112         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
1113
1114     def elaborate(self, platform):
1115         m = Module()
1116         # convenience
1117         comb, sync = m.d.comb, m.d.sync
1118         cur_state = self.cur_state
1119         pdecode2 = self.pdecode2
1120         dbg = self.dbg
1121         core = self.core
1122
1123         # set up peripherals and core
1124         core_rst = self.core_rst
1125         self.setup_peripherals(m)
1126
1127         # reset current state if core reset requested
1128         with m.If(core_rst):
1129             m.d.sync += self.cur_state.eq(0)
1130
1131         # PC and instruction from I-Memory
1132         comb += self.pc_o.eq(cur_state.pc)
1133         pc_changed = Signal()  # note write to PC
1134         sv_changed = Signal()  # note write to SVSTATE
1135
1136         # indicate to outside world if any FU is still executing
1137         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1138
1139         # read state either from incoming override or from regfile
1140         # TODO: really should be doing MSR in the same way
1141         pc = state_get(m, core_rst, self.pc_i,
1142                        "pc",                  # read PC
1143                        self.state_r_pc, StateRegs.PC)
1144         svstate = state_get(m, core_rst, self.svstate_i,
1145                             "svstate",   # read SVSTATE
1146                             self.state_r_sv, StateRegs.SVSTATE)
1147
1148         # don't write pc every cycle
1149         comb += self.state_w_pc.wen.eq(0)
1150         comb += self.state_w_pc.i_data.eq(0)
1151
1152         # address of the next instruction, in the absence of a branch
1153         # depends on the instruction size
1154         nia = Signal(64)
1155
1156         # connect up debug signals
1157         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1158         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1159         comb += dbg.state.pc.eq(pc)
1160         comb += dbg.state.svstate.eq(svstate)
1161         comb += dbg.state.msr.eq(cur_state.msr)
1162
1163         # pass the prefix mode from Fetch to Issue, so the latter can loop
1164         # on VL==0
1165         is_svp64_mode = Signal()
1166
1167         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1168         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1169         # these are the handshake signals between each
1170
1171         # fetch FSM can run as soon as the PC is valid
1172         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1173         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1174
1175         # fetch FSM hands over the instruction to be decoded / issued
1176         fetch_insn_o_valid = Signal()
1177         fetch_insn_i_ready = Signal()
1178
1179         # predicate fetch FSM decodes and fetches the predicate
1180         pred_insn_i_valid = Signal()
1181         pred_insn_o_ready = Signal()
1182
1183         # predicate fetch FSM delivers the masks
1184         pred_mask_o_valid = Signal()
1185         pred_mask_i_ready = Signal()
1186
1187         # issue FSM delivers the instruction to the be executed
1188         exec_insn_i_valid = Signal()
1189         exec_insn_o_ready = Signal()
1190
1191         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1192         exec_pc_o_valid = Signal()
1193         exec_pc_i_ready = Signal()
1194
1195         # the FSMs here are perhaps unusual in that they detect conditions
1196         # then "hold" information, combinatorially, for the core
1197         # (as opposed to using sync - which would be on a clock's delay)
1198         # this includes the actual opcode, valid flags and so on.
1199
1200         # Fetch, then predicate fetch, then Issue, then Execute.
1201         # Issue is where the VL for-loop # lives.  the ready/valid
1202         # signalling is used to communicate between the four.
1203
1204         # set up Fetch FSM
1205         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1206                          self.imem, core_rst, pdecode2, cur_state,
1207                          dbg, core, svstate, nia, is_svp64_mode)
1208         m.submodules.fetch = fetch
1209         # connect up in/out data to existing Signals
1210         comb += fetch.p.i_data.pc.eq(pc)
1211         # and the ready/valid signalling
1212         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1213         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1214         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1215         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1216
1217         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1218                        dbg, core_rst, is_svp64_mode,
1219                        fetch_pc_o_ready, fetch_pc_i_valid,
1220                        fetch_insn_o_valid, fetch_insn_i_ready,
1221                        pred_insn_i_valid, pred_insn_o_ready,
1222                        pred_mask_o_valid, pred_mask_i_ready,
1223                        exec_insn_i_valid, exec_insn_o_ready,
1224                        exec_pc_o_valid, exec_pc_i_ready)
1225
1226         if self.svp64_en:
1227             self.fetch_predicate_fsm(m,
1228                                      pred_insn_i_valid, pred_insn_o_ready,
1229                                      pred_mask_o_valid, pred_mask_i_ready)
1230
1231         self.execute_fsm(m, core, pc_changed, sv_changed,
1232                          exec_insn_i_valid, exec_insn_o_ready,
1233                          exec_pc_o_valid, exec_pc_i_ready)
1234
1235         # this bit doesn't have to be in the FSM: connect up to read
1236         # regfiles on demand from DMI
1237         self.do_dmi(m, dbg)
1238
1239         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1240         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1241         self.tb_dec_fsm(m, cur_state.dec)
1242
1243         return m
1244
1245     def do_dmi(self, m, dbg):
1246         """deals with DMI debug requests
1247
1248         currently only provides read requests for the INT regfile, CR and XER
1249         it will later also deal with *writing* to these regfiles.
1250         """
1251         comb = m.d.comb
1252         sync = m.d.sync
1253         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1254         intrf = self.core.regs.rf['int']
1255
1256         with m.If(d_reg.req):  # request for regfile access being made
1257             # TODO: error-check this
1258             # XXX should this be combinatorial?  sync better?
1259             if intrf.unary:
1260                 comb += self.int_r.ren.eq(1 << d_reg.addr)
1261             else:
1262                 comb += self.int_r.addr.eq(d_reg.addr)
1263                 comb += self.int_r.ren.eq(1)
1264         d_reg_delay = Signal()
1265         sync += d_reg_delay.eq(d_reg.req)
1266         with m.If(d_reg_delay):
1267             # data arrives one clock later
1268             comb += d_reg.data.eq(self.int_r.o_data)
1269             comb += d_reg.ack.eq(1)
1270
1271         # sigh same thing for CR debug
1272         with m.If(d_cr.req):  # request for regfile access being made
1273             comb += self.cr_r.ren.eq(0b11111111)  # enable all
1274         d_cr_delay = Signal()
1275         sync += d_cr_delay.eq(d_cr.req)
1276         with m.If(d_cr_delay):
1277             # data arrives one clock later
1278             comb += d_cr.data.eq(self.cr_r.o_data)
1279             comb += d_cr.ack.eq(1)
1280
1281         # aaand XER...
1282         with m.If(d_xer.req):  # request for regfile access being made
1283             comb += self.xer_r.ren.eq(0b111111)  # enable all
1284         d_xer_delay = Signal()
1285         sync += d_xer_delay.eq(d_xer.req)
1286         with m.If(d_xer_delay):
1287             # data arrives one clock later
1288             comb += d_xer.data.eq(self.xer_r.o_data)
1289             comb += d_xer.ack.eq(1)
1290
1291     def tb_dec_fsm(self, m, spr_dec):
1292         """tb_dec_fsm
1293
1294         this is a FSM for updating either dec or tb.  it runs alternately
1295         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1296         value to DEC, however the regfile has "passthrough" on it so this
1297         *should* be ok.
1298
1299         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1300         """
1301
1302         comb, sync = m.d.comb, m.d.sync
1303         fast_rf = self.core.regs.rf['fast']
1304         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
1305         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
1306
1307         with m.FSM() as fsm:
1308
1309             # initiates read of current DEC
1310             with m.State("DEC_READ"):
1311                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1312                 comb += fast_r_dectb.ren.eq(1)
1313                 m.next = "DEC_WRITE"
1314
1315             # waits for DEC read to arrive (1 cycle), updates with new value
1316             with m.State("DEC_WRITE"):
1317                 new_dec = Signal(64)
1318                 # TODO: MSR.LPCR 32-bit decrement mode
1319                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1320                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1321                 comb += fast_w_dectb.wen.eq(1)
1322                 comb += fast_w_dectb.i_data.eq(new_dec)
1323                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
1324                 m.next = "TB_READ"
1325
1326             # initiates read of current TB
1327             with m.State("TB_READ"):
1328                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1329                 comb += fast_r_dectb.ren.eq(1)
1330                 m.next = "TB_WRITE"
1331
1332             # waits for read TB to arrive, initiates write of current TB
1333             with m.State("TB_WRITE"):
1334                 new_tb = Signal(64)
1335                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1336                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1337                 comb += fast_w_dectb.wen.eq(1)
1338                 comb += fast_w_dectb.i_data.eq(new_tb)
1339                 m.next = "DEC_READ"
1340
1341         return m
1342
1343     def __iter__(self):
1344         yield from self.pc_i.ports()
1345         yield self.pc_o
1346         yield self.memerr_o
1347         yield from self.core.ports()
1348         yield from self.imem.ports()
1349         yield self.core_bigendian_i
1350         yield self.busy_o
1351
1352     def ports(self):
1353         return list(self)
1354
1355     def external_ports(self):
1356         ports = self.pc_i.ports()
1357         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1358                   ]
1359
1360         if self.jtag_en:
1361             ports += list(self.jtag.external_ports())
1362         else:
1363             # don't add DMI if JTAG is enabled
1364             ports += list(self.dbg.dmi.ports())
1365
1366         ports += list(self.imem.ibus.fields.values())
1367         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1368
1369         if self.sram4x4k:
1370             for sram in self.sram4k:
1371                 ports += list(sram.bus.fields.values())
1372
1373         if self.xics:
1374             ports += list(self.xics_icp.bus.fields.values())
1375             ports += list(self.xics_ics.bus.fields.values())
1376             ports.append(self.int_level_i)
1377
1378         if self.gpio:
1379             ports += list(self.simple_gpio.bus.fields.values())
1380             ports.append(self.gpio_o)
1381
1382         return ports
1383
1384     def ports(self):
1385         return list(self)
1386
1387
1388 class TestIssuer(Elaboratable):
1389     def __init__(self, pspec):
1390         self.ti = TestIssuerInternal(pspec)
1391         self.pll = DummyPLL(instance=True)
1392
1393         # PLL direct clock or not
1394         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1395         if self.pll_en:
1396             self.pll_test_o = Signal(reset_less=True)
1397             self.pll_vco_o = Signal(reset_less=True)
1398             self.clk_sel_i = Signal(2, reset_less=True)
1399             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1400             self.pllclk_clk = ClockSignal("pllclk")
1401
1402     def elaborate(self, platform):
1403         m = Module()
1404         comb = m.d.comb
1405
1406         # TestIssuer nominally runs at main clock, actually it is
1407         # all combinatorial internally except for coresync'd components
1408         m.submodules.ti = ti = self.ti
1409
1410         if self.pll_en:
1411             # ClockSelect runs at PLL output internal clock rate
1412             m.submodules.wrappll = pll = self.pll
1413
1414             # add clock domains from PLL
1415             cd_pll = ClockDomain("pllclk")
1416             m.domains += cd_pll
1417
1418             # PLL clock established.  has the side-effect of running clklsel
1419             # at the PLL's speed (see DomainRenamer("pllclk") above)
1420             pllclk = self.pllclk_clk
1421             comb += pllclk.eq(pll.clk_pll_o)
1422
1423             # wire up external 24mhz to PLL
1424             #comb += pll.clk_24_i.eq(self.ref_clk)
1425             # output 18 mhz PLL test signal, and analog oscillator out
1426             comb += self.pll_test_o.eq(pll.pll_test_o)
1427             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1428
1429             # input to pll clock selection
1430             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1431
1432             # now wire up ResetSignals.  don't mind them being in this domain
1433             pll_rst = ResetSignal("pllclk")
1434             comb += pll_rst.eq(ResetSignal())
1435
1436         # internal clock is set to selector clock-out.  has the side-effect of
1437         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1438         # debug clock runs at coresync internal clock
1439         cd_coresync = ClockDomain("coresync")
1440         #m.domains += cd_coresync
1441         if self.ti.dbg_domain != 'sync':
1442             cd_dbgsync = ClockDomain("dbgsync")
1443             #m.domains += cd_dbgsync
1444         intclk = ClockSignal("coresync")
1445         dbgclk = ClockSignal(self.ti.dbg_domain)
1446         # XXX BYPASS PLL XXX
1447         # XXX BYPASS PLL XXX
1448         # XXX BYPASS PLL XXX
1449         if self.pll_en:
1450             comb += intclk.eq(self.ref_clk)
1451         else:
1452             comb += intclk.eq(ClockSignal())
1453         if self.ti.dbg_domain != 'sync':
1454             dbgclk = ClockSignal(self.ti.dbg_domain)
1455             comb += dbgclk.eq(intclk)
1456
1457         return m
1458
1459     def ports(self):
1460         return list(self.ti.ports()) + list(self.pll.ports()) + \
1461             [ClockSignal(), ResetSignal()]
1462
1463     def external_ports(self):
1464         ports = self.ti.external_ports()
1465         ports.append(ClockSignal())
1466         ports.append(ResetSignal())
1467         if self.pll_en:
1468             ports.append(self.clk_sel_i)
1469             ports.append(self.pll.clk_24_i)
1470             ports.append(self.pll_test_o)
1471             ports.append(self.pll_vco_o)
1472             ports.append(self.pllclk_clk)
1473             ports.append(self.ref_clk)
1474         return ports
1475
1476
1477 if __name__ == '__main__':
1478     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1479              'spr': 1,
1480              'div': 1,
1481              'mul': 1,
1482              'shiftrot': 1
1483              }
1484     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1485                          imem_ifacetype='bare_wb',
1486                          addr_wid=48,
1487                          mask_wid=8,
1488                          reg_wid=64,
1489                          units=units)
1490     dut = TestIssuer(pspec)
1491     vl = main(dut, ports=dut.ports(), name="test_issuer")
1492
1493     if len(sys.argv) == 1:
1494         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1495         with open("test_issuer.il", "w") as f:
1496             f.write(vl)