src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                            SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory  # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51 from soc.experiment.icache import ICache
  52
  53 from nmutil.util import rising_edge
  54
  55
  56 def get_insn(f_instr_o, pc):
  57     if f_instr_o.width == 32:
  58         return f_instr_o
  59     else:
  60         # 64-bit: bit 2 of pc decides which word to select
  61         return f_instr_o.word_select(pc[2], 32)
  62
  63 # gets state input or reads from state regfile
  64
  65
  66 def state_get(m, core_rst, state_i, name, regfile, regnum):
  67     comb = m.d.comb
  68     sync = m.d.sync
  69     # read the PC
  70     res = Signal(64, reset_less=True, name=name)
  71     res_ok_delay = Signal(name="%s_ok_delay" % name)
  72     with m.If(~core_rst):
  73         sync += res_ok_delay.eq(~state_i.ok)
  74         with m.If(state_i.ok):
  75             # incoming override (start from pc_i)
  76             comb += res.eq(state_i.data)
  77         with m.Else():
  78             # otherwise read StateRegs regfile for PC...
  79             comb += regfile.ren.eq(1 << regnum)
  80         # ... but on a 1-clock delay
  81         with m.If(res_ok_delay):
  82             comb += res.eq(regfile.o_data)
  83     return res
  84
  85
  86 def get_predint(m, mask, name):
  87     """decode SVP64 predicate integer mask field to reg number and invert
  88     this is identical to the equivalent function in ISACaller except that
  89     it doesn't read the INT directly, it just decodes "what needs to be done"
  90     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  91
  92     * all1s is set to indicate that no mask is to be applied.
  93     * regread indicates the GPR register number to be read
  94     * invert is set to indicate that the register value is to be inverted
  95     * unary indicates that the contents of the register is to be shifted 1<<r3
  96     """
  97     comb = m.d.comb
  98     regread = Signal(5, name=name+"regread")
  99     invert = Signal(name=name+"invert")
 100     unary = Signal(name=name+"unary")
 101     all1s = Signal(name=name+"all1s")
 102     with m.Switch(mask):
 103         with m.Case(SVP64PredInt.ALWAYS.value):
 104             comb += all1s.eq(1)      # use 0b1111 (all ones)
 105         with m.Case(SVP64PredInt.R3_UNARY.value):
 106             comb += regread.eq(3)
 107             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 108         with m.Case(SVP64PredInt.R3.value):
 109             comb += regread.eq(3)
 110         with m.Case(SVP64PredInt.R3_N.value):
 111             comb += regread.eq(3)
 112             comb += invert.eq(1)
 113         with m.Case(SVP64PredInt.R10.value):
 114             comb += regread.eq(10)
 115         with m.Case(SVP64PredInt.R10_N.value):
 116             comb += regread.eq(10)
 117             comb += invert.eq(1)
 118         with m.Case(SVP64PredInt.R30.value):
 119             comb += regread.eq(30)
 120         with m.Case(SVP64PredInt.R30_N.value):
 121             comb += regread.eq(30)
 122             comb += invert.eq(1)
 123     return regread, invert, unary, all1s
 124
 125
 126 def get_predcr(m, mask, name):
 127     """decode SVP64 predicate CR to reg number field and invert status
 128     this is identical to _get_predcr in ISACaller
 129     """
 130     comb = m.d.comb
 131     idx = Signal(2, name=name+"idx")
 132     invert = Signal(name=name+"crinvert")
 133     with m.Switch(mask):
 134         with m.Case(SVP64PredCR.LT.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(0)
 137         with m.Case(SVP64PredCR.GE.value):
 138             comb += idx.eq(CR.LT)
 139             comb += invert.eq(1)
 140         with m.Case(SVP64PredCR.GT.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(0)
 143         with m.Case(SVP64PredCR.LE.value):
 144             comb += idx.eq(CR.GT)
 145             comb += invert.eq(1)
 146         with m.Case(SVP64PredCR.EQ.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(0)
 149         with m.Case(SVP64PredCR.NE.value):
 150             comb += idx.eq(CR.EQ)
 151             comb += invert.eq(1)
 152         with m.Case(SVP64PredCR.SO.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(0)
 155         with m.Case(SVP64PredCR.NS.value):
 156             comb += idx.eq(CR.SO)
 157             comb += invert.eq(1)
 158     return idx, invert
 159
 160
 161 # Fetch Finite State Machine.
 162 # WARNING: there are currently DriverConflicts but it's actually working.
 163 # TODO, here: everything that is global in nature, information from the
 164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 165 # not only that: TestIssuerInternal.imem can entirely move into here
 166 # because imem is only ever accessed inside the FetchFSM.
 167 class FetchFSM(ControlBase):
 168     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 169                  pdecode2, cur_state,
 170                  dbg, core, svstate, nia, is_svp64_mode):
 171         self.allow_overlap = allow_overlap
 172         self.svp64_en = svp64_en
 173         self.imem = imem
 174         self.core_rst = core_rst
 175         self.pdecode2 = pdecode2
 176         self.cur_state = cur_state
 177         self.dbg = dbg
 178         self.core = core
 179         self.svstate = svstate
 180         self.nia = nia
 181         self.is_svp64_mode = is_svp64_mode
 182
 183         # set up pipeline ControlBase and allocate i/o specs
 184         # (unusual: normally done by the Pipeline API)
 185         super().__init__(stage=self)
 186         self.p.i_data, self.n.o_data = self.new_specs(None)
 187         self.i, self.o = self.p.i_data, self.n.o_data
 188
 189     # next 3 functions are Stage API Compliance
 190     def setup(self, m, i):
 191         pass
 192
 193     def ispec(self):
 194         return FetchInput()
 195
 196     def ospec(self):
 197         return FetchOutput()
 198
 199     def elaborate(self, platform):
 200         """fetch FSM
 201
 202         this FSM performs fetch of raw instruction data, partial-decodes
 203         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 204         read a 2nd 32-bit quantity if that occurs.
 205         """
 206         m = super().elaborate(platform)
 207
 208         dbg = self.dbg
 209         core = self.core
 210         pc = self.i.pc
 211         svstate = self.svstate
 212         nia = self.nia
 213         is_svp64_mode = self.is_svp64_mode
 214         fetch_pc_o_ready = self.p.o_ready
 215         fetch_pc_i_valid = self.p.i_valid
 216         fetch_insn_o_valid = self.n.o_valid
 217         fetch_insn_i_ready = self.n.i_ready
 218
 219         comb = m.d.comb
 220         sync = m.d.sync
 221         pdecode2 = self.pdecode2
 222         cur_state = self.cur_state
 223         dec_opcode_o = pdecode2.dec.raw_opcode_in  # raw opcode
 224
 225         msr_read = Signal(reset=1)
 226
 227         # also note instruction fetch failed
 228         if hasattr(core, "icache"):
 229             fetch_failed = core.icache.i_out.fetch_failed
 230         else:
 231             fetch_failed = Const(0, 1)
 232
 233         # don't read msr every cycle
 234         staterf = self.core.regs.rf['state']
 235         state_r_msr = staterf.r_ports['msr']  # MSR rd
 236
 237         comb += state_r_msr.ren.eq(0)
 238
 239         with m.FSM(name='fetch_fsm'):
 240
 241             # waiting (zzz)
 242             with m.State("IDLE"):
 243                 with m.If(~dbg.stopping_o & ~fetch_failed):
 244                     comb += fetch_pc_o_ready.eq(1)
 245                 with m.If(fetch_pc_i_valid & ~fetch_failed):
 246                     # instruction allowed to go: start by reading the PC
 247                     # capture the PC and also drop it into Insn Memory
 248                     # we have joined a pair of combinatorial memory
 249                     # lookups together.  this is Generally Bad.
 250                     comb += self.imem.a_pc_i.eq(pc)
 251                     comb += self.imem.a_i_valid.eq(1)
 252                     comb += self.imem.f_i_valid.eq(1)
 253                     sync += cur_state.pc.eq(pc)
 254                     sync += cur_state.svstate.eq(svstate)  # and svstate
 255
 256                     # initiate read of MSR. arrives one clock later
 257                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 258                     sync += msr_read.eq(0)
 259
 260                     m.next = "INSN_READ"  # move to "wait for bus" phase
 261
 262             # dummy pause to find out why simulation is not keeping up
 263             with m.State("INSN_READ"):
 264                 if self.allow_overlap:
 265                     stopping = dbg.stopping_o
 266                 else:
 267                     stopping = Const(0)
 268                 with m.If(stopping):
 269                     # stopping: jump back to idle
 270                     m.next = "IDLE"
 271                 with m.Else():
 272                     # one cycle later, msr/sv read arrives.  valid only once.
 273                     with m.If(~msr_read):
 274                         sync += msr_read.eq(1)  # yeah don't read it again
 275                         sync += cur_state.msr.eq(state_r_msr.o_data)
 276                     with m.If(self.imem.f_busy_o & ~fetch_failed):  # zzz...
 277                         # busy but not fetch failed: stay in wait-read
 278                         comb += self.imem.a_i_valid.eq(1)
 279                         comb += self.imem.f_i_valid.eq(1)
 280                     with m.Else():
 281                         # not busy (or fetch failed!): instruction fetched
 282                         # when fetch failed, the instruction gets ignored
 283                         # by the decoder
 284                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 285                         if self.svp64_en:
 286                             svp64 = self.svp64
 287                             # decode the SVP64 prefix, if any
 288                             comb += svp64.raw_opcode_in.eq(insn)
 289                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 290                             # pass the decoded prefix (if any) to PowerDecoder2
 291                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 292                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 293                             # remember whether this is a prefixed instruction,
 294                             # so the FSM can readily loop when VL==0
 295                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 296                             # calculate the address of the following instruction
 297                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 298                             sync += nia.eq(cur_state.pc + insn_size)
 299                             with m.If(~svp64.is_svp64_mode):
 300                                 # with no prefix, store the instruction
 301                                 # and hand it directly to the next FSM
 302                                 sync += dec_opcode_o.eq(insn)
 303                                 m.next = "INSN_READY"
 304                             with m.Else():
 305                                 # fetch the rest of the instruction from memory
 306                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 307                                 comb += self.imem.a_i_valid.eq(1)
 308                                 comb += self.imem.f_i_valid.eq(1)
 309                                 m.next = "INSN_READ2"
 310                         else:
 311                             # not SVP64 - 32-bit only
 312                             sync += nia.eq(cur_state.pc + 4)
 313                             sync += dec_opcode_o.eq(insn)
 314                             m.next = "INSN_READY"
 315
 316             with m.State("INSN_READ2"):
 317                 with m.If(self.imem.f_busy_o):  # zzz...
 318                     # busy: stay in wait-read
 319                     comb += self.imem.a_i_valid.eq(1)
 320                     comb += self.imem.f_i_valid.eq(1)
 321                 with m.Else():
 322                     # not busy: instruction fetched
 323                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 324                     sync += dec_opcode_o.eq(insn)
 325                     m.next = "INSN_READY"
 326                     # TODO: probably can start looking at pdecode2.rm_dec
 327                     # here or maybe even in INSN_READ state, if svp64_mode
 328                     # detected, in order to trigger - and wait for - the
 329                     # predicate reading.
 330                     if self.svp64_en:
 331                         pmode = pdecode2.rm_dec.predmode
 332                     """
 333                     if pmode != SVP64PredMode.ALWAYS.value:
 334                         fire predicate loading FSM and wait before
 335                         moving to INSN_READY
 336                     else:
 337                         sync += self.srcmask.eq(-1) # set to all 1s
 338                         sync += self.dstmask.eq(-1) # set to all 1s
 339                         m.next = "INSN_READY"
 340                     """
 341
 342             with m.State("INSN_READY"):
 343                 # hand over the instruction, to be decoded
 344                 comb += fetch_insn_o_valid.eq(1)
 345                 with m.If(fetch_insn_i_ready):
 346                     m.next = "IDLE"
 347
 348         # whatever was done above, over-ride it if core reset is held
 349         with m.If(self.core_rst):
 350             sync += nia.eq(0)
 351
 352         return m
 353
 354
 355 class TestIssuerInternal(Elaboratable):
 356     """TestIssuer - reads instructions from TestMemory and issues them
 357
 358     efficiency and speed is not the main goal here: functional correctness
 359     and code clarity is.  optimisations (which almost 100% interfere with
 360     easy understanding) come later.
 361     """
 362
 363     def __init__(self, pspec):
 364
 365         # test is SVP64 is to be enabled
 366         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 367
 368         # and if regfiles are reduced
 369         self.regreduce_en = (hasattr(pspec, "regreduce") and
 370                              (pspec.regreduce == True))
 371
 372         # and if overlap requested
 373         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 374                               (pspec.allow_overlap == True))
 375
 376         # JTAG interface.  add this right at the start because if it's
 377         # added it *modifies* the pspec, by adding enable/disable signals
 378         # for parts of the rest of the core
 379         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 380         self.dbg_domain = "sync"  # sigh "dbgsunc" too problematic
 381         # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 382         if self.jtag_en:
 383             # XXX MUST keep this up-to-date with litex, and
 384             # soc-cocotb-sim, and err.. all needs sorting out, argh
 385             subset = ['uart',
 386                       'mtwi',
 387                       'eint', 'gpio', 'mspi0',
 388                       # 'mspi1', - disabled for now
 389                       # 'pwm', 'sd0', - disabled for now
 390                       'sdr']
 391             self.jtag = JTAG(get_pinspecs(subset=subset),
 392                              domain=self.dbg_domain)
 393             # add signals to pspec to enable/disable icache and dcache
 394             # (or data and intstruction wishbone if icache/dcache not included)
 395             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 396             # TODO: do we actually care if these are not domain-synchronised?
 397             # honestly probably not.
 398             pspec.wb_icache_en = self.jtag.wb_icache_en
 399             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 400             self.wb_sram_en = self.jtag.wb_sram_en
 401         else:
 402             self.wb_sram_en = Const(1)
 403
 404         # add 4k sram blocks?
 405         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 406                          pspec.sram4x4kblock == True)
 407         if self.sram4x4k:
 408             self.sram4k = []
 409             for i in range(4):
 410                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 411                                                     # features={'err'}
 412                                                     ))
 413
 414         # add interrupt controller?
 415         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 416         if self.xics:
 417             self.xics_icp = XICS_ICP()
 418             self.xics_ics = XICS_ICS()
 419             self.int_level_i = self.xics_ics.int_level_i
 420
 421         # add GPIO peripheral?
 422         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 423         if self.gpio:
 424             self.simple_gpio = SimpleGPIO()
 425             self.gpio_o = self.simple_gpio.gpio_o
 426
 427         # main instruction core.  suitable for prototyping / demo only
 428         self.core = core = NonProductionCore(pspec)
 429         self.core_rst = ResetSignal("coresync")
 430
 431         # instruction decoder.  goes into Trap Record
 432         #pdecode = create_pdecode()
 433         self.cur_state = CoreState("cur")  # current state (MSR/PC/SVSTATE)
 434         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 435                                      opkls=IssuerDecode2ToOperand,
 436                                      svp64_en=self.svp64_en,
 437                                      regreduce_en=self.regreduce_en)
 438         pdecode = self.pdecode2.dec
 439
 440         if self.svp64_en:
 441             self.svp64 = SVP64PrefixDecoder()  # for decoding SVP64 prefix
 442
 443         # Test Instruction memory
 444         if hasattr(core, "icache"):
 445             # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
 446             # truly dreadful.  needs a huge reorg.
 447             pspec.icache = core.icache
 448         self.imem = ConfigFetchUnit(pspec).fu
 449
 450         # DMI interface
 451         self.dbg = CoreDebug()
 452
 453         # instruction go/monitor
 454         self.pc_o = Signal(64, reset_less=True)
 455         self.pc_i = Data(64, "pc_i")  # set "ok" to indicate "please change me"
 456         self.svstate_i = Data(64, "svstate_i")  # ditto
 457         self.core_bigendian_i = Signal()  # TODO: set based on MSR.LE
 458         self.busy_o = Signal(reset_less=True)
 459         self.memerr_o = Signal(reset_less=True)
 460
 461         # STATE regfile read /write ports for PC, MSR, SVSTATE
 462         staterf = self.core.regs.rf['state']
 463         self.state_r_pc = staterf.r_ports['cia']  # PC rd
 464         self.state_w_pc = staterf.w_ports['d_wr1']  # PC wr
 465         self.state_r_sv = staterf.r_ports['sv']  # SVSTATE rd
 466         self.state_w_sv = staterf.w_ports['sv']  # SVSTATE wr
 467
 468         # DMI interface access
 469         intrf = self.core.regs.rf['int']
 470         crrf = self.core.regs.rf['cr']
 471         xerrf = self.core.regs.rf['xer']
 472         self.int_r = intrf.r_ports['dmi']  # INT read
 473         self.cr_r = crrf.r_ports['full_cr_dbg']  # CR read
 474         self.xer_r = xerrf.r_ports['full_xer']  # XER read
 475
 476         if self.svp64_en:
 477             # for predication
 478             self.int_pred = intrf.r_ports['pred']  # INT predicate read
 479             self.cr_pred = crrf.r_ports['cr_pred']  # CR predicate read
 480
 481         # hack method of keeping an eye on whether branch/trap set the PC
 482         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 483         self.state_nia.wen.name = 'state_nia_wen'
 484
 485         # pulse to synchronize the simulator at instruction end
 486         self.insn_done = Signal()
 487
 488         # indicate any instruction still outstanding, in execution
 489         self.any_busy = Signal()
 490
 491         if self.svp64_en:
 492             # store copies of predicate masks
 493             self.srcmask = Signal(64)
 494             self.dstmask = Signal(64)
 495
 496     def fetch_predicate_fsm(self, m,
 497                             pred_insn_i_valid, pred_insn_o_ready,
 498                             pred_mask_o_valid, pred_mask_i_ready):
 499         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 500            src/dest predicate masks
 501
 502         https://bugs.libre-soc.org/show_bug.cgi?id=617
 503         the predicates can be read here, by using IntRegs r_ports['pred']
 504         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 505         be done through multiple reads, extracting one relevant at a time.
 506         later, a faster way would be to use the 32-bit-wide CR port but
 507         this is more complex decoding, here.  equivalent code used in
 508         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 509
 510         note: this ENTIRE FSM is not to be called when svp64 is disabled
 511         """
 512         comb = m.d.comb
 513         sync = m.d.sync
 514         pdecode2 = self.pdecode2
 515         rm_dec = pdecode2.rm_dec  # SVP64RMModeDecode
 516         predmode = rm_dec.predmode
 517         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 518         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 519         # get src/dst step, so we can skip already used mask bits
 520         cur_state = self.cur_state
 521         srcstep = cur_state.svstate.srcstep
 522         dststep = cur_state.svstate.dststep
 523         cur_vl = cur_state.svstate.vl
 524
 525         # decode predicates
 526         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 527         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 528         sidx, scrinvert = get_predcr(m, srcpred, 's')
 529         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 530
 531         # store fetched masks, for either intpred or crpred
 532         # when src/dst step is not zero, the skipped mask bits need to be
 533         # shifted-out, before actually storing them in src/dest mask
 534         new_srcmask = Signal(64, reset_less=True)
 535         new_dstmask = Signal(64, reset_less=True)
 536
 537         with m.FSM(name="fetch_predicate"):
 538
 539             with m.State("FETCH_PRED_IDLE"):
 540                 comb += pred_insn_o_ready.eq(1)
 541                 with m.If(pred_insn_i_valid):
 542                     with m.If(predmode == SVP64PredMode.INT):
 543                         # skip fetching destination mask register, when zero
 544                         with m.If(dall1s):
 545                             sync += new_dstmask.eq(-1)
 546                             # directly go to fetch source mask register
 547                             # guaranteed not to be zero (otherwise predmode
 548                             # would be SVP64PredMode.ALWAYS, not INT)
 549                             comb += int_pred.addr.eq(sregread)
 550                             comb += int_pred.ren.eq(1)
 551                             m.next = "INT_SRC_READ"
 552                         # fetch destination predicate register
 553                         with m.Else():
 554                             comb += int_pred.addr.eq(dregread)
 555                             comb += int_pred.ren.eq(1)
 556                             m.next = "INT_DST_READ"
 557                     with m.Elif(predmode == SVP64PredMode.CR):
 558                         # go fetch masks from the CR register file
 559                         sync += new_srcmask.eq(0)
 560                         sync += new_dstmask.eq(0)
 561                         m.next = "CR_READ"
 562                     with m.Else():
 563                         sync += self.srcmask.eq(-1)
 564                         sync += self.dstmask.eq(-1)
 565                         m.next = "FETCH_PRED_DONE"
 566
 567             with m.State("INT_DST_READ"):
 568                 # store destination mask
 569                 inv = Repl(dinvert, 64)
 570                 with m.If(dunary):
 571                     # set selected mask bit for 1<<r3 mode
 572                     dst_shift = Signal(range(64))
 573                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 574                     sync += new_dstmask.eq(1 << dst_shift)
 575                 with m.Else():
 576                     # invert mask if requested
 577                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 578                 # skip fetching source mask register, when zero
 579                 with m.If(sall1s):
 580                     sync += new_srcmask.eq(-1)
 581                     m.next = "FETCH_PRED_SHIFT_MASK"
 582                 # fetch source predicate register
 583                 with m.Else():
 584                     comb += int_pred.addr.eq(sregread)
 585                     comb += int_pred.ren.eq(1)
 586                     m.next = "INT_SRC_READ"
 587
 588             with m.State("INT_SRC_READ"):
 589                 # store source mask
 590                 inv = Repl(sinvert, 64)
 591                 with m.If(sunary):
 592                     # set selected mask bit for 1<<r3 mode
 593                     src_shift = Signal(range(64))
 594                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 595                     sync += new_srcmask.eq(1 << src_shift)
 596                 with m.Else():
 597                     # invert mask if requested
 598                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 599                 m.next = "FETCH_PRED_SHIFT_MASK"
 600
 601             # fetch masks from the CR register file
 602             # implements the following loop:
 603             # idx, inv = get_predcr(mask)
 604             # mask = 0
 605             # for cr_idx in range(vl):
 606             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 607             #     if cr[idx] ^ inv:
 608             #         mask |= 1 << cr_idx
 609             # return mask
 610             with m.State("CR_READ"):
 611                 # CR index to be read, which will be ready by the next cycle
 612                 cr_idx = Signal.like(cur_vl, reset_less=True)
 613                 # submit the read operation to the regfile
 614                 with m.If(cr_idx != cur_vl):
 615                     # the CR read port is unary ...
 616                     # ren = 1 << cr_idx
 617                     # ... in MSB0 convention ...
 618                     # ren = 1 << (7 - cr_idx)
 619                     # ... and with an offset:
 620                     # ren = 1 << (7 - off - cr_idx)
 621                     idx = SVP64CROffs.CRPred + cr_idx
 622                     comb += cr_pred.ren.eq(1 << (7 - idx))
 623                     # signal data valid in the next cycle
 624                     cr_read = Signal(reset_less=True)
 625                     sync += cr_read.eq(1)
 626                     # load the next index
 627                     sync += cr_idx.eq(cr_idx + 1)
 628                 with m.Else():
 629                     # exit on loop end
 630                     sync += cr_read.eq(0)
 631                     sync += cr_idx.eq(0)
 632                     m.next = "FETCH_PRED_SHIFT_MASK"
 633                 with m.If(cr_read):
 634                     # compensate for the one cycle delay on the regfile
 635                     cur_cr_idx = Signal.like(cur_vl)
 636                     comb += cur_cr_idx.eq(cr_idx - 1)
 637                     # read the CR field, select the appropriate bit
 638                     cr_field = Signal(4)
 639                     scr_bit = Signal()
 640                     dcr_bit = Signal()
 641                     comb += cr_field.eq(cr_pred.o_data)
 642                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
 643                                        ^ scrinvert)
 644                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
 645                                        ^ dcrinvert)
 646                     # set the corresponding mask bit
 647                     bit_to_set = Signal.like(self.srcmask)
 648                     comb += bit_to_set.eq(1 << cur_cr_idx)
 649                     with m.If(scr_bit):
 650                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 651                     with m.If(dcr_bit):
 652                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 653
 654             with m.State("FETCH_PRED_SHIFT_MASK"):
 655                 # shift-out skipped mask bits
 656                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 657                 sync += self.dstmask.eq(new_dstmask >> dststep)
 658                 m.next = "FETCH_PRED_DONE"
 659
 660             with m.State("FETCH_PRED_DONE"):
 661                 comb += pred_mask_o_valid.eq(1)
 662                 with m.If(pred_mask_i_ready):
 663                     m.next = "FETCH_PRED_IDLE"
 664
 665     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 666                   dbg, core_rst, is_svp64_mode,
 667                   fetch_pc_o_ready, fetch_pc_i_valid,
 668                   fetch_insn_o_valid, fetch_insn_i_ready,
 669                   pred_insn_i_valid, pred_insn_o_ready,
 670                   pred_mask_o_valid, pred_mask_i_ready,
 671                   exec_insn_i_valid, exec_insn_o_ready,
 672                   exec_pc_o_valid, exec_pc_i_ready):
 673         """issue FSM
 674
 675         decode / issue FSM.  this interacts with the "fetch" FSM
 676         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 677         (outgoing). also interacts with the "execute" FSM
 678         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 679         (incoming).
 680         SVP64 RM prefixes have already been set up by the
 681         "fetch" phase, so execute is fairly straightforward.
 682         """
 683
 684         comb = m.d.comb
 685         sync = m.d.sync
 686         pdecode2 = self.pdecode2
 687         cur_state = self.cur_state
 688
 689         # temporaries
 690         dec_opcode_i = pdecode2.dec.raw_opcode_in  # raw opcode
 691
 692         # for updating svstate (things like srcstep etc.)
 693         update_svstate = Signal()  # set this (below) if updating
 694         new_svstate = SVSTATERec("new_svstate")
 695         comb += new_svstate.eq(cur_state.svstate)
 696
 697         # precalculate srcstep+1 and dststep+1
 698         cur_srcstep = cur_state.svstate.srcstep
 699         cur_dststep = cur_state.svstate.dststep
 700         next_srcstep = Signal.like(cur_srcstep)
 701         next_dststep = Signal.like(cur_dststep)
 702         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 703         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 704
 705         # note if an exception happened.  in a pipelined or OoO design
 706         # this needs to be accompanied by "shadowing" (or stalling)
 707         exc_happened = self.core.o.exc_happened
 708         # also note instruction fetch failed
 709         if hasattr(core, "icache"):
 710             fetch_failed = core.icache.i_out.fetch_failed
 711         else:
 712             fetch_failed = Const(0, 1)
 713         # set to fault in decoder
 714         # update (highest priority) instruction fault
 715         rising_fetch_failed = rising_edge(m, fetch_failed)
 716         with m.If(rising_fetch_failed):
 717             sync += pdecode2.instr_fault.eq(1)
 718
 719         with m.FSM(name="issue_fsm"):
 720
 721             # sync with the "fetch" phase which is reading the instruction
 722             # at this point, there is no instruction running, that
 723             # could inadvertently update the PC.
 724             with m.State("ISSUE_START"):
 725                 # reset instruction fault
 726                 sync += pdecode2.instr_fault.eq(0)
 727                 # wait on "core stop" release, before next fetch
 728                 # need to do this here, in case we are in a VL==0 loop
 729                 with m.If(~dbg.core_stop_o & ~core_rst):
 730                     comb += fetch_pc_i_valid.eq(1)  # tell fetch to start
 731                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 732                         m.next = "INSN_WAIT"
 733                 with m.Else():
 734                     # tell core it's stopped, and acknowledge debug handshake
 735                     comb += dbg.core_stopped_i.eq(1)
 736                     # while stopped, allow updating the PC and SVSTATE
 737                     with m.If(self.pc_i.ok):
 738                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 739                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 740                         sync += pc_changed.eq(1)
 741                     with m.If(self.svstate_i.ok):
 742                         comb += new_svstate.eq(self.svstate_i.data)
 743                         comb += update_svstate.eq(1)
 744                         sync += sv_changed.eq(1)
 745
 746             # wait for an instruction to arrive from Fetch
 747             with m.State("INSN_WAIT"):
 748                 if self.allow_overlap:
 749                     stopping = dbg.stopping_o
 750                 else:
 751                     stopping = Const(0)
 752                 with m.If(stopping):
 753                     # stopping: jump back to idle
 754                     m.next = "ISSUE_START"
 755                 with m.Else():
 756                     comb += fetch_insn_i_ready.eq(1)
 757                     with m.If(fetch_insn_o_valid):
 758                         # loop into ISSUE_START if it's a SVP64 instruction
 759                         # and VL == 0.  this because VL==0 is a for-loop
 760                         # from 0 to 0 i.e. always, always a NOP.
 761                         cur_vl = cur_state.svstate.vl
 762                         with m.If(is_svp64_mode & (cur_vl == 0)):
 763                             # update the PC before fetching the next instruction
 764                             # since we are in a VL==0 loop, no instruction was
 765                             # executed that we could be overwriting
 766                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 767                             comb += self.state_w_pc.i_data.eq(nia)
 768                             comb += self.insn_done.eq(1)
 769                             m.next = "ISSUE_START"
 770                         with m.Else():
 771                             if self.svp64_en:
 772                                 m.next = "PRED_START"  # fetching predicate
 773                             else:
 774                                 m.next = "DECODE_SV"  # skip predication
 775
 776             with m.State("PRED_START"):
 777                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 778                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 779                     m.next = "MASK_WAIT"
 780
 781             with m.State("MASK_WAIT"):
 782                 comb += pred_mask_i_ready.eq(1)  # ready to receive the masks
 783                 with m.If(pred_mask_o_valid):  # predication masks are ready
 784                     m.next = "PRED_SKIP"
 785
 786             # skip zeros in predicate
 787             with m.State("PRED_SKIP"):
 788                 with m.If(~is_svp64_mode):
 789                     m.next = "DECODE_SV"  # nothing to do
 790                 with m.Else():
 791                     if self.svp64_en:
 792                         pred_src_zero = pdecode2.rm_dec.pred_sz
 793                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 794
 795                         # new srcstep, after skipping zeros
 796                         skip_srcstep = Signal.like(cur_srcstep)
 797                         # value to be added to the current srcstep
 798                         src_delta = Signal.like(cur_srcstep)
 799                         # add leading zeros to srcstep, if not in zero mode
 800                         with m.If(~pred_src_zero):
 801                             # priority encoder (count leading zeros)
 802                             # append guard bit, in case the mask is all zeros
 803                             pri_enc_src = PriorityEncoder(65)
 804                             m.submodules.pri_enc_src = pri_enc_src
 805                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 806                                                          Const(1, 1)))
 807                             comb += src_delta.eq(pri_enc_src.o)
 808                         # apply delta to srcstep
 809                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 810                         # shift-out all leading zeros from the mask
 811                         # plus the leading "one" bit
 812                         # TODO count leading zeros and shift-out the zero
 813                         #      bits, in the same step, in hardware
 814                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 815
 816                         # same as above, but for dststep
 817                         skip_dststep = Signal.like(cur_dststep)
 818                         dst_delta = Signal.like(cur_dststep)
 819                         with m.If(~pred_dst_zero):
 820                             pri_enc_dst = PriorityEncoder(65)
 821                             m.submodules.pri_enc_dst = pri_enc_dst
 822                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 823                                                          Const(1, 1)))
 824                             comb += dst_delta.eq(pri_enc_dst.o)
 825                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 826                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 827
 828                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 829                         with m.If((skip_srcstep >= cur_vl) |
 830                                   (skip_dststep >= cur_vl)):
 831                             # end of VL loop. Update PC and reset src/dst step
 832                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 833                             comb += self.state_w_pc.i_data.eq(nia)
 834                             comb += new_svstate.srcstep.eq(0)
 835                             comb += new_svstate.dststep.eq(0)
 836                             comb += update_svstate.eq(1)
 837                             # synchronize with the simulator
 838                             comb += self.insn_done.eq(1)
 839                             # go back to Issue
 840                             m.next = "ISSUE_START"
 841                         with m.Else():
 842                             # update new src/dst step
 843                             comb += new_svstate.srcstep.eq(skip_srcstep)
 844                             comb += new_svstate.dststep.eq(skip_dststep)
 845                             comb += update_svstate.eq(1)
 846                             # proceed to Decode
 847                             m.next = "DECODE_SV"
 848
 849                         # pass predicate mask bits through to satellite decoders
 850                         # TODO: for SIMD this will be *multiple* bits
 851                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 852                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 853
 854             # after src/dst step have been updated, we are ready
 855             # to decode the instruction
 856             with m.State("DECODE_SV"):
 857                 # decode the instruction
 858                 sync += core.i.e.eq(pdecode2.e)
 859                 sync += core.i.state.eq(cur_state)
 860                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 861                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 862                 if self.svp64_en:
 863                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 864                     # set RA_OR_ZERO detection in satellite decoders
 865                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 866                     # and svp64 detection
 867                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 868                     # and svp64 bit-rev'd ldst mode
 869                     ldst_dec = pdecode2.use_svp64_ldst_dec
 870                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 871                 # after decoding, reset any previous exception condition,
 872                 # allowing it to be set again during the next execution
 873                 sync += pdecode2.ldst_exc.eq(0)
 874
 875                 m.next = "INSN_EXECUTE"  # move to "execute"
 876
 877             # handshake with execution FSM, move to "wait" once acknowledged
 878             with m.State("INSN_EXECUTE"):
 879                 comb += exec_insn_i_valid.eq(1)  # trigger execute
 880                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 881                     m.next = "EXECUTE_WAIT"
 882
 883             with m.State("EXECUTE_WAIT"):
 884                 # wait on "core stop" release, at instruction end
 885                 # need to do this here, in case we are in a VL>1 loop
 886                 with m.If(~dbg.core_stop_o & ~core_rst):
 887                     comb += exec_pc_i_ready.eq(1)
 888                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 889                     # the exception info needs to be blatted into
 890                     # pdecode.ldst_exc, and the instruction "re-run".
 891                     # when ldst_exc.happened is set, the PowerDecoder2
 892                     # reacts very differently: it re-writes the instruction
 893                     # with a "trap" (calls PowerDecoder2.trap()) which
 894                     # will *overwrite* whatever was requested and jump the
 895                     # PC to the exception address, as well as alter MSR.
 896                     # nothing else needs to be done other than to note
 897                     # the change of PC and MSR (and, later, SVSTATE)
 898                     with m.If(exc_happened):
 899                         mmu = core.fus.get_exc("mmu0")
 900                         ldst = core.fus.get_exc("ldst0")
 901                         with m.If(fetch_failed):
 902                             # instruction fetch: exception is from MMU
 903                             # reset instr_fault (highest priority)
 904                             sync += pdecode2.ldst_exc.eq(mmu)
 905                             sync += pdecode2.instr_fault.eq(0)
 906                         with m.Else():
 907                             # otherwise assume it was a LDST exception
 908                             sync += pdecode2.ldst_exc.eq(ldst)
 909
 910                     with m.If(exec_pc_o_valid):
 911
 912                         # was this the last loop iteration?
 913                         is_last = Signal()
 914                         cur_vl = cur_state.svstate.vl
 915                         comb += is_last.eq(next_srcstep == cur_vl)
 916
 917                         # return directly to Decode if Execute generated an
 918                         # exception.
 919                         with m.If(pdecode2.ldst_exc.happened):
 920                             m.next = "DECODE_SV"
 921
 922                         # if either PC or SVSTATE were changed by the previous
 923                         # instruction, go directly back to Fetch, without
 924                         # updating either PC or SVSTATE
 925                         with m.Elif(pc_changed | sv_changed):
 926                             m.next = "ISSUE_START"
 927
 928                         # also return to Fetch, when no output was a vector
 929                         # (regardless of SRCSTEP and VL), or when the last
 930                         # instruction was really the last one of the VL loop
 931                         with m.Elif((~pdecode2.loop_continue) | is_last):
 932                             # before going back to fetch, update the PC state
 933                             # register with the NIA.
 934                             # ok here we are not reading the branch unit.
 935                             # TODO: this just blithely overwrites whatever
 936                             #       pipeline updated the PC
 937                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 938                             comb += self.state_w_pc.i_data.eq(nia)
 939                             # reset SRCSTEP before returning to Fetch
 940                             if self.svp64_en:
 941                                 with m.If(pdecode2.loop_continue):
 942                                     comb += new_svstate.srcstep.eq(0)
 943                                     comb += new_svstate.dststep.eq(0)
 944                                     comb += update_svstate.eq(1)
 945                             else:
 946                                 comb += new_svstate.srcstep.eq(0)
 947                                 comb += new_svstate.dststep.eq(0)
 948                                 comb += update_svstate.eq(1)
 949                             m.next = "ISSUE_START"
 950
 951                         # returning to Execute? then, first update SRCSTEP
 952                         with m.Else():
 953                             comb += new_svstate.srcstep.eq(next_srcstep)
 954                             comb += new_svstate.dststep.eq(next_dststep)
 955                             comb += update_svstate.eq(1)
 956                             # return to mask skip loop
 957                             m.next = "PRED_SKIP"
 958
 959                 with m.Else():
 960                     comb += dbg.core_stopped_i.eq(1)
 961                     # while stopped, allow updating the PC and SVSTATE
 962                     with m.If(self.pc_i.ok):
 963                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 964                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 965                         sync += pc_changed.eq(1)
 966                     with m.If(self.svstate_i.ok):
 967                         comb += new_svstate.eq(self.svstate_i.data)
 968                         comb += update_svstate.eq(1)
 969                         sync += sv_changed.eq(1)
 970
 971         # check if svstate needs updating: if so, write it to State Regfile
 972         with m.If(update_svstate):
 973             comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
 974             comb += self.state_w_sv.i_data.eq(new_svstate)
 975             sync += cur_state.svstate.eq(new_svstate)  # for next clock
 976
 977     def execute_fsm(self, m, core, pc_changed, sv_changed,
 978                     exec_insn_i_valid, exec_insn_o_ready,
 979                     exec_pc_o_valid, exec_pc_i_ready):
 980         """execute FSM
 981
 982         execute FSM. this interacts with the "issue" FSM
 983         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 984         (outgoing). SVP64 RM prefixes have already been set up by the
 985         "issue" phase, so execute is fairly straightforward.
 986         """
 987
 988         comb = m.d.comb
 989         sync = m.d.sync
 990         pdecode2 = self.pdecode2
 991
 992         # temporaries
 993         core_busy_o = core.n.o_data.busy_o  # core is busy
 994         core_ivalid_i = core.p.i_valid              # instruction is valid
 995
 996         with m.FSM(name="exec_fsm"):
 997
 998             # waiting for instruction bus (stays there until not busy)
 999             with m.State("INSN_START"):
1000                 comb += exec_insn_o_ready.eq(1)
1001                 with m.If(exec_insn_i_valid):
1002                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
1003                     sync += sv_changed.eq(0)
1004                     sync += pc_changed.eq(0)
1005                     with m.If(core.p.o_ready):  # only move if accepted
1006                         m.next = "INSN_ACTIVE"  # move to "wait completion"
1007
1008             # instruction started: must wait till it finishes
1009             with m.State("INSN_ACTIVE"):
1010                 # note changes to PC and SVSTATE
1011                 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1012                     sync += sv_changed.eq(1)
1013                 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1014                     sync += pc_changed.eq(1)
1015                 with m.If(~core_busy_o):  # instruction done!
1016                     comb += exec_pc_o_valid.eq(1)
1017                     with m.If(exec_pc_i_ready):
1018                         # when finished, indicate "done".
1019                         # however, if there was an exception, the instruction
1020                         # is *not* yet done.  this is an implementation
1021                         # detail: we choose to implement exceptions by
1022                         # taking the exception information from the LDST
1023                         # unit, putting that *back* into the PowerDecoder2,
1024                         # and *re-running the entire instruction*.
1025                         # if we erroneously indicate "done" here, it is as if
1026                         # there were *TWO* instructions:
1027                         # 1) the failed LDST 2) a TRAP.
1028                         with m.If(~pdecode2.ldst_exc.happened):
1029                             comb += self.insn_done.eq(1)
1030                         m.next = "INSN_START"  # back to fetch
1031
1032     def setup_peripherals(self, m):
1033         comb, sync = m.d.comb, m.d.sync
1034
1035         # okaaaay so the debug module must be in coresync clock domain
1036         # but NOT its reset signal. to cope with this, set every single
1037         # submodule explicitly in coresync domain, debug and JTAG
1038         # in their own one but using *external* reset.
1039         csd = DomainRenamer("coresync")
1040         dbd = DomainRenamer(self.dbg_domain)
1041
1042         m.submodules.core = core = csd(self.core)
1043         # this _so_ needs sorting out.  ICache is added down inside
1044         # LoadStore1 and is already a submodule of LoadStore1
1045         if not isinstance(self.imem, ICache):
1046             m.submodules.imem = imem = csd(self.imem)
1047         m.submodules.dbg = dbg = dbd(self.dbg)
1048         if self.jtag_en:
1049             m.submodules.jtag = jtag = dbd(self.jtag)
1050             # TODO: UART2GDB mux, here, from external pin
1051             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1052             sync += dbg.dmi.connect_to(jtag.dmi)
1053
1054         cur_state = self.cur_state
1055
1056         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1057         if self.sram4x4k:
1058             for i, sram in enumerate(self.sram4k):
1059                 m.submodules["sram4k_%d" % i] = csd(sram)
1060                 comb += sram.enable.eq(self.wb_sram_en)
1061
1062         # XICS interrupt handler
1063         if self.xics:
1064             m.submodules.xics_icp = icp = csd(self.xics_icp)
1065             m.submodules.xics_ics = ics = csd(self.xics_ics)
1066             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1067             sync += cur_state.eint.eq(icp.core_irq_o)  # connect ICP to core
1068
1069         # GPIO test peripheral
1070         if self.gpio:
1071             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1072
1073         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1074         # XXX causes litex ECP5 test to get wrong idea about input and output
1075         # (but works with verilator sim *sigh*)
1076         # if self.gpio and self.xics:
1077         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1078
1079         # instruction decoder
1080         pdecode = create_pdecode()
1081         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1082         if self.svp64_en:
1083             m.submodules.svp64 = svp64 = csd(self.svp64)
1084
1085         # convenience
1086         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1087         intrf = self.core.regs.rf['int']
1088
1089         # clock delay power-on reset
1090         cd_por = ClockDomain(reset_less=True)
1091         cd_sync = ClockDomain()
1092         core_sync = ClockDomain("coresync")
1093         m.domains += cd_por, cd_sync, core_sync
1094         if self.dbg_domain != "sync":
1095             dbg_sync = ClockDomain(self.dbg_domain)
1096             m.domains += dbg_sync
1097
1098         ti_rst = Signal(reset_less=True)
1099         delay = Signal(range(4), reset=3)
1100         with m.If(delay != 0):
1101             m.d.por += delay.eq(delay - 1)
1102         comb += cd_por.clk.eq(ClockSignal())
1103
1104         # power-on reset delay
1105         core_rst = ResetSignal("coresync")
1106         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1107         comb += core_rst.eq(ti_rst)
1108
1109         # debug clock is same as coresync, but reset is *main external*
1110         if self.dbg_domain != "sync":
1111             dbg_rst = ResetSignal(self.dbg_domain)
1112             comb += dbg_rst.eq(ResetSignal())
1113
1114         # busy/halted signals from core
1115         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o  # core is busy
1116         comb += self.busy_o.eq(core_busy_o)
1117         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1118
1119         # temporary hack: says "go" immediately for both address gen and ST
1120         l0 = core.l0
1121         ldst = core.fus.fus['ldst0']
1122         st_go_edge = rising_edge(m, ldst.st.rel_o)
1123         # link addr-go direct to rel
1124         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1125         m.d.comb += ldst.st.go_i.eq(st_go_edge)  # link store-go to rising rel
1126
1127     def elaborate(self, platform):
1128         m = Module()
1129         # convenience
1130         comb, sync = m.d.comb, m.d.sync
1131         cur_state = self.cur_state
1132         pdecode2 = self.pdecode2
1133         dbg = self.dbg
1134         core = self.core
1135
1136         # set up peripherals and core
1137         core_rst = self.core_rst
1138         self.setup_peripherals(m)
1139
1140         # reset current state if core reset requested
1141         with m.If(core_rst):
1142             m.d.sync += self.cur_state.eq(0)
1143
1144         # PC and instruction from I-Memory
1145         comb += self.pc_o.eq(cur_state.pc)
1146         pc_changed = Signal()  # note write to PC
1147         sv_changed = Signal()  # note write to SVSTATE
1148
1149         # indicate to outside world if any FU is still executing
1150         comb += self.any_busy.eq(core.n.o_data.any_busy_o)  # any FU executing
1151
1152         # read state either from incoming override or from regfile
1153         # TODO: really should be doing MSR in the same way
1154         pc = state_get(m, core_rst, self.pc_i,
1155                        "pc",                  # read PC
1156                        self.state_r_pc, StateRegs.PC)
1157         svstate = state_get(m, core_rst, self.svstate_i,
1158                             "svstate",   # read SVSTATE
1159                             self.state_r_sv, StateRegs.SVSTATE)
1160
1161         # don't write pc every cycle
1162         comb += self.state_w_pc.wen.eq(0)
1163         comb += self.state_w_pc.i_data.eq(0)
1164
1165         # address of the next instruction, in the absence of a branch
1166         # depends on the instruction size
1167         nia = Signal(64)
1168
1169         # connect up debug signals
1170         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1171         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1172         comb += dbg.state.pc.eq(pc)
1173         comb += dbg.state.svstate.eq(svstate)
1174         comb += dbg.state.msr.eq(cur_state.msr)
1175
1176         # pass the prefix mode from Fetch to Issue, so the latter can loop
1177         # on VL==0
1178         is_svp64_mode = Signal()
1179
1180         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1181         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1182         # these are the handshake signals between each
1183
1184         # fetch FSM can run as soon as the PC is valid
1185         fetch_pc_i_valid = Signal()  # Execute tells Fetch "start next read"
1186         fetch_pc_o_ready = Signal()  # Fetch Tells SVSTATE "proceed"
1187
1188         # fetch FSM hands over the instruction to be decoded / issued
1189         fetch_insn_o_valid = Signal()
1190         fetch_insn_i_ready = Signal()
1191
1192         # predicate fetch FSM decodes and fetches the predicate
1193         pred_insn_i_valid = Signal()
1194         pred_insn_o_ready = Signal()
1195
1196         # predicate fetch FSM delivers the masks
1197         pred_mask_o_valid = Signal()
1198         pred_mask_i_ready = Signal()
1199
1200         # issue FSM delivers the instruction to the be executed
1201         exec_insn_i_valid = Signal()
1202         exec_insn_o_ready = Signal()
1203
1204         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1205         exec_pc_o_valid = Signal()
1206         exec_pc_i_ready = Signal()
1207
1208         # the FSMs here are perhaps unusual in that they detect conditions
1209         # then "hold" information, combinatorially, for the core
1210         # (as opposed to using sync - which would be on a clock's delay)
1211         # this includes the actual opcode, valid flags and so on.
1212
1213         # Fetch, then predicate fetch, then Issue, then Execute.
1214         # Issue is where the VL for-loop # lives.  the ready/valid
1215         # signalling is used to communicate between the four.
1216
1217         # set up Fetch FSM
1218         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1219                          self.imem, core_rst, pdecode2, cur_state,
1220                          dbg, core, svstate, nia, is_svp64_mode)
1221         m.submodules.fetch = fetch
1222         # connect up in/out data to existing Signals
1223         comb += fetch.p.i_data.pc.eq(pc)
1224         # and the ready/valid signalling
1225         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1226         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1227         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1228         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1229
1230         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1231                        dbg, core_rst, is_svp64_mode,
1232                        fetch_pc_o_ready, fetch_pc_i_valid,
1233                        fetch_insn_o_valid, fetch_insn_i_ready,
1234                        pred_insn_i_valid, pred_insn_o_ready,
1235                        pred_mask_o_valid, pred_mask_i_ready,
1236                        exec_insn_i_valid, exec_insn_o_ready,
1237                        exec_pc_o_valid, exec_pc_i_ready)
1238
1239         if self.svp64_en:
1240             self.fetch_predicate_fsm(m,
1241                                      pred_insn_i_valid, pred_insn_o_ready,
1242                                      pred_mask_o_valid, pred_mask_i_ready)
1243
1244         self.execute_fsm(m, core, pc_changed, sv_changed,
1245                          exec_insn_i_valid, exec_insn_o_ready,
1246                          exec_pc_o_valid, exec_pc_i_ready)
1247
1248         # this bit doesn't have to be in the FSM: connect up to read
1249         # regfiles on demand from DMI
1250         self.do_dmi(m, dbg)
1251
1252         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1253         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1254         self.tb_dec_fsm(m, cur_state.dec)
1255
1256         return m
1257
1258     def do_dmi(self, m, dbg):
1259         """deals with DMI debug requests
1260
1261         currently only provides read requests for the INT regfile, CR and XER
1262         it will later also deal with *writing* to these regfiles.
1263         """
1264         comb = m.d.comb
1265         sync = m.d.sync
1266         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1267         intrf = self.core.regs.rf['int']
1268
1269         with m.If(d_reg.req):  # request for regfile access being made
1270             # TODO: error-check this
1271             # XXX should this be combinatorial?  sync better?
1272             if intrf.unary:
1273                 comb += self.int_r.ren.eq(1 << d_reg.addr)
1274             else:
1275                 comb += self.int_r.addr.eq(d_reg.addr)
1276                 comb += self.int_r.ren.eq(1)
1277         d_reg_delay = Signal()
1278         sync += d_reg_delay.eq(d_reg.req)
1279         with m.If(d_reg_delay):
1280             # data arrives one clock later
1281             comb += d_reg.data.eq(self.int_r.o_data)
1282             comb += d_reg.ack.eq(1)
1283
1284         # sigh same thing for CR debug
1285         with m.If(d_cr.req):  # request for regfile access being made
1286             comb += self.cr_r.ren.eq(0b11111111)  # enable all
1287         d_cr_delay = Signal()
1288         sync += d_cr_delay.eq(d_cr.req)
1289         with m.If(d_cr_delay):
1290             # data arrives one clock later
1291             comb += d_cr.data.eq(self.cr_r.o_data)
1292             comb += d_cr.ack.eq(1)
1293
1294         # aaand XER...
1295         with m.If(d_xer.req):  # request for regfile access being made
1296             comb += self.xer_r.ren.eq(0b111111)  # enable all
1297         d_xer_delay = Signal()
1298         sync += d_xer_delay.eq(d_xer.req)
1299         with m.If(d_xer_delay):
1300             # data arrives one clock later
1301             comb += d_xer.data.eq(self.xer_r.o_data)
1302             comb += d_xer.ack.eq(1)
1303
1304     def tb_dec_fsm(self, m, spr_dec):
1305         """tb_dec_fsm
1306
1307         this is a FSM for updating either dec or tb.  it runs alternately
1308         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1309         value to DEC, however the regfile has "passthrough" on it so this
1310         *should* be ok.
1311
1312         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1313         """
1314
1315         comb, sync = m.d.comb, m.d.sync
1316         fast_rf = self.core.regs.rf['fast']
1317         fast_r_dectb = fast_rf.r_ports['issue']  # DEC/TB
1318         fast_w_dectb = fast_rf.w_ports['issue']  # DEC/TB
1319
1320         with m.FSM() as fsm:
1321
1322             # initiates read of current DEC
1323             with m.State("DEC_READ"):
1324                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1325                 comb += fast_r_dectb.ren.eq(1)
1326                 m.next = "DEC_WRITE"
1327
1328             # waits for DEC read to arrive (1 cycle), updates with new value
1329             with m.State("DEC_WRITE"):
1330                 new_dec = Signal(64)
1331                 # TODO: MSR.LPCR 32-bit decrement mode
1332                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1333                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1334                 comb += fast_w_dectb.wen.eq(1)
1335                 comb += fast_w_dectb.i_data.eq(new_dec)
1336                 sync += spr_dec.eq(new_dec)  # copy into cur_state for decoder
1337                 m.next = "TB_READ"
1338
1339             # initiates read of current TB
1340             with m.State("TB_READ"):
1341                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1342                 comb += fast_r_dectb.ren.eq(1)
1343                 m.next = "TB_WRITE"
1344
1345             # waits for read TB to arrive, initiates write of current TB
1346             with m.State("TB_WRITE"):
1347                 new_tb = Signal(64)
1348                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1349                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1350                 comb += fast_w_dectb.wen.eq(1)
1351                 comb += fast_w_dectb.i_data.eq(new_tb)
1352                 m.next = "DEC_READ"
1353
1354         return m
1355
1356     def __iter__(self):
1357         yield from self.pc_i.ports()
1358         yield self.pc_o
1359         yield self.memerr_o
1360         yield from self.core.ports()
1361         yield from self.imem.ports()
1362         yield self.core_bigendian_i
1363         yield self.busy_o
1364
1365     def ports(self):
1366         return list(self)
1367
1368     def external_ports(self):
1369         ports = self.pc_i.ports()
1370         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1371                   ]
1372
1373         if self.jtag_en:
1374             ports += list(self.jtag.external_ports())
1375         else:
1376             # don't add DMI if JTAG is enabled
1377             ports += list(self.dbg.dmi.ports())
1378
1379         ports += list(self.imem.ibus.fields.values())
1380         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1381
1382         if self.sram4x4k:
1383             for sram in self.sram4k:
1384                 ports += list(sram.bus.fields.values())
1385
1386         if self.xics:
1387             ports += list(self.xics_icp.bus.fields.values())
1388             ports += list(self.xics_ics.bus.fields.values())
1389             ports.append(self.int_level_i)
1390
1391         if self.gpio:
1392             ports += list(self.simple_gpio.bus.fields.values())
1393             ports.append(self.gpio_o)
1394
1395         return ports
1396
1397     def ports(self):
1398         return list(self)
1399
1400
1401 class TestIssuer(Elaboratable):
1402     def __init__(self, pspec):
1403         self.ti = TestIssuerInternal(pspec)
1404         self.pll = DummyPLL(instance=True)
1405
1406         # PLL direct clock or not
1407         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1408         if self.pll_en:
1409             self.pll_test_o = Signal(reset_less=True)
1410             self.pll_vco_o = Signal(reset_less=True)
1411             self.clk_sel_i = Signal(2, reset_less=True)
1412             self.ref_clk = ClockSignal()  # can't rename it but that's ok
1413             self.pllclk_clk = ClockSignal("pllclk")
1414
1415     def elaborate(self, platform):
1416         m = Module()
1417         comb = m.d.comb
1418
1419         # TestIssuer nominally runs at main clock, actually it is
1420         # all combinatorial internally except for coresync'd components
1421         m.submodules.ti = ti = self.ti
1422
1423         if self.pll_en:
1424             # ClockSelect runs at PLL output internal clock rate
1425             m.submodules.wrappll = pll = self.pll
1426
1427             # add clock domains from PLL
1428             cd_pll = ClockDomain("pllclk")
1429             m.domains += cd_pll
1430
1431             # PLL clock established.  has the side-effect of running clklsel
1432             # at the PLL's speed (see DomainRenamer("pllclk") above)
1433             pllclk = self.pllclk_clk
1434             comb += pllclk.eq(pll.clk_pll_o)
1435
1436             # wire up external 24mhz to PLL
1437             #comb += pll.clk_24_i.eq(self.ref_clk)
1438             # output 18 mhz PLL test signal, and analog oscillator out
1439             comb += self.pll_test_o.eq(pll.pll_test_o)
1440             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1441
1442             # input to pll clock selection
1443             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1444
1445             # now wire up ResetSignals.  don't mind them being in this domain
1446             pll_rst = ResetSignal("pllclk")
1447             comb += pll_rst.eq(ResetSignal())
1448
1449         # internal clock is set to selector clock-out.  has the side-effect of
1450         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1451         # debug clock runs at coresync internal clock
1452         cd_coresync = ClockDomain("coresync")
1453         #m.domains += cd_coresync
1454         if self.ti.dbg_domain != 'sync':
1455             cd_dbgsync = ClockDomain("dbgsync")
1456             #m.domains += cd_dbgsync
1457         intclk = ClockSignal("coresync")
1458         dbgclk = ClockSignal(self.ti.dbg_domain)
1459         # XXX BYPASS PLL XXX
1460         # XXX BYPASS PLL XXX
1461         # XXX BYPASS PLL XXX
1462         if self.pll_en:
1463             comb += intclk.eq(self.ref_clk)
1464         else:
1465             comb += intclk.eq(ClockSignal())
1466         if self.ti.dbg_domain != 'sync':
1467             dbgclk = ClockSignal(self.ti.dbg_domain)
1468             comb += dbgclk.eq(intclk)
1469
1470         return m
1471
1472     def ports(self):
1473         return list(self.ti.ports()) + list(self.pll.ports()) + \
1474             [ClockSignal(), ResetSignal()]
1475
1476     def external_ports(self):
1477         ports = self.ti.external_ports()
1478         ports.append(ClockSignal())
1479         ports.append(ResetSignal())
1480         if self.pll_en:
1481             ports.append(self.clk_sel_i)
1482             ports.append(self.pll.clk_24_i)
1483             ports.append(self.pll_test_o)
1484             ports.append(self.pll_vco_o)
1485             ports.append(self.pllclk_clk)
1486             ports.append(self.ref_clk)
1487         return ports
1488
1489
1490 if __name__ == '__main__':
1491     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1492              'spr': 1,
1493              'div': 1,
1494              'mul': 1,
1495              'shiftrot': 1
1496              }
1497     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1498                          imem_ifacetype='bare_wb',
1499                          addr_wid=48,
1500                          mask_wid=8,
1501                          reg_wid=64,
1502                          units=units)
1503     dut = TestIssuer(pspec)
1504     vl = main(dut, ports=dut.ports(), name="test_issuer")
1505
1506     if len(sys.argv) == 1:
1507         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1508         with open("test_issuer.il", "w") as f:
1509             f.write(vl)