src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput, FetchInput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                      SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51
  52
  53 from nmutil.util import rising_edge
  54
  55 def get_insn(f_instr_o, pc):
  56     if f_instr_o.width == 32:
  57         return f_instr_o
  58     else:
  59         # 64-bit: bit 2 of pc decides which word to select
  60         return f_instr_o.word_select(pc[2], 32)
  61
  62 # gets state input or reads from state regfile
  63 def state_get(m, core_rst, state_i, name, regfile, regnum):
  64     comb = m.d.comb
  65     sync = m.d.sync
  66     # read the PC
  67     res = Signal(64, reset_less=True, name=name)
  68     res_ok_delay = Signal(name="%s_ok_delay" % name)
  69     with m.If(~core_rst):
  70         sync += res_ok_delay.eq(~state_i.ok)
  71         with m.If(state_i.ok):
  72             # incoming override (start from pc_i)
  73             comb += res.eq(state_i.data)
  74         with m.Else():
  75             # otherwise read StateRegs regfile for PC...
  76             comb += regfile.ren.eq(1<<regnum)
  77         # ... but on a 1-clock delay
  78         with m.If(res_ok_delay):
  79             comb += res.eq(regfile.o_data)
  80     return res
  81
  82
  83 def get_predint(m, mask, name):
  84     """decode SVP64 predicate integer mask field to reg number and invert
  85     this is identical to the equivalent function in ISACaller except that
  86     it doesn't read the INT directly, it just decodes "what needs to be done"
  87     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  88
  89     * all1s is set to indicate that no mask is to be applied.
  90     * regread indicates the GPR register number to be read
  91     * invert is set to indicate that the register value is to be inverted
  92     * unary indicates that the contents of the register is to be shifted 1<<r3
  93     """
  94     comb = m.d.comb
  95     regread = Signal(5, name=name+"regread")
  96     invert = Signal(name=name+"invert")
  97     unary = Signal(name=name+"unary")
  98     all1s = Signal(name=name+"all1s")
  99     with m.Switch(mask):
 100         with m.Case(SVP64PredInt.ALWAYS.value):
 101             comb += all1s.eq(1)      # use 0b1111 (all ones)
 102         with m.Case(SVP64PredInt.R3_UNARY.value):
 103             comb += regread.eq(3)
 104             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 105         with m.Case(SVP64PredInt.R3.value):
 106             comb += regread.eq(3)
 107         with m.Case(SVP64PredInt.R3_N.value):
 108             comb += regread.eq(3)
 109             comb += invert.eq(1)
 110         with m.Case(SVP64PredInt.R10.value):
 111             comb += regread.eq(10)
 112         with m.Case(SVP64PredInt.R10_N.value):
 113             comb += regread.eq(10)
 114             comb += invert.eq(1)
 115         with m.Case(SVP64PredInt.R30.value):
 116             comb += regread.eq(30)
 117         with m.Case(SVP64PredInt.R30_N.value):
 118             comb += regread.eq(30)
 119             comb += invert.eq(1)
 120     return regread, invert, unary, all1s
 121
 122
 123 def get_predcr(m, mask, name):
 124     """decode SVP64 predicate CR to reg number field and invert status
 125     this is identical to _get_predcr in ISACaller
 126     """
 127     comb = m.d.comb
 128     idx = Signal(2, name=name+"idx")
 129     invert = Signal(name=name+"crinvert")
 130     with m.Switch(mask):
 131         with m.Case(SVP64PredCR.LT.value):
 132             comb += idx.eq(CR.LT)
 133             comb += invert.eq(0)
 134         with m.Case(SVP64PredCR.GE.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(1)
 137         with m.Case(SVP64PredCR.GT.value):
 138             comb += idx.eq(CR.GT)
 139             comb += invert.eq(0)
 140         with m.Case(SVP64PredCR.LE.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(1)
 143         with m.Case(SVP64PredCR.EQ.value):
 144             comb += idx.eq(CR.EQ)
 145             comb += invert.eq(0)
 146         with m.Case(SVP64PredCR.NE.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(1)
 149         with m.Case(SVP64PredCR.SO.value):
 150             comb += idx.eq(CR.SO)
 151             comb += invert.eq(0)
 152         with m.Case(SVP64PredCR.NS.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(1)
 155     return idx, invert
 156
 157
 158 # Fetch Finite State Machine.
 159 # WARNING: there are currently DriverConflicts but it's actually working.
 160 # TODO, here: everything that is global in nature, information from the
 161 # main TestIssuerInternal, needs to move to either ispec() or ospec().
 162 # not only that: TestIssuerInternal.imem can entirely move into here
 163 # because imem is only ever accessed inside the FetchFSM.
 164 class FetchFSM(ControlBase):
 165     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 166                        pdecode2, cur_state,
 167                        dbg, core, svstate, nia, is_svp64_mode):
 168         self.allow_overlap = allow_overlap
 169         self.svp64_en = svp64_en
 170         self.imem = imem
 171         self.core_rst = core_rst
 172         self.pdecode2 = pdecode2
 173         self.cur_state = cur_state
 174         self.dbg = dbg
 175         self.core = core
 176         self.svstate = svstate
 177         self.nia = nia
 178         self.is_svp64_mode = is_svp64_mode
 179
 180         # set up pipeline ControlBase and allocate i/o specs
 181         # (unusual: normally done by the Pipeline API)
 182         super().__init__(stage=self)
 183         self.p.i_data, self.n.o_data = self.new_specs(None)
 184         self.i, self.o = self.p.i_data, self.n.o_data
 185
 186     # next 3 functions are Stage API Compliance
 187     def setup(self, m, i):
 188         pass
 189
 190     def ispec(self):
 191         return FetchInput()
 192
 193     def ospec(self):
 194         return FetchOutput()
 195
 196     def elaborate(self, platform):
 197         """fetch FSM
 198
 199         this FSM performs fetch of raw instruction data, partial-decodes
 200         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 201         read a 2nd 32-bit quantity if that occurs.
 202         """
 203         m = super().elaborate(platform)
 204
 205         dbg = self.dbg
 206         core = self.core,
 207         pc = self.i.pc
 208         svstate = self.svstate
 209         nia = self.nia
 210         is_svp64_mode = self.is_svp64_mode
 211         fetch_pc_o_ready = self.p.o_ready
 212         fetch_pc_i_valid = self.p.i_valid
 213         fetch_insn_o_valid = self.n.o_valid
 214         fetch_insn_i_ready = self.n.i_ready
 215
 216         comb = m.d.comb
 217         sync = m.d.sync
 218         pdecode2 = self.pdecode2
 219         cur_state = self.cur_state
 220         dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
 221
 222         msr_read = Signal(reset=1)
 223
 224         # don't read msr every cycle
 225         staterf = self.core.regs.rf['state']
 226         state_r_msr = staterf.r_ports['msr'] # MSR rd
 227
 228         comb += state_r_msr.ren.eq(0)
 229
 230         with m.FSM(name='fetch_fsm'):
 231
 232             # waiting (zzz)
 233             with m.State("IDLE"):
 234                 with m.If(~dbg.stopping_o):
 235                     comb += fetch_pc_o_ready.eq(1)
 236                 with m.If(fetch_pc_i_valid):
 237                     # instruction allowed to go: start by reading the PC
 238                     # capture the PC and also drop it into Insn Memory
 239                     # we have joined a pair of combinatorial memory
 240                     # lookups together.  this is Generally Bad.
 241                     comb += self.imem.a_pc_i.eq(pc)
 242                     comb += self.imem.a_i_valid.eq(1)
 243                     comb += self.imem.f_i_valid.eq(1)
 244                     sync += cur_state.pc.eq(pc)
 245                     sync += cur_state.svstate.eq(svstate) # and svstate
 246
 247                     # initiate read of MSR. arrives one clock later
 248                     comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
 249                     sync += msr_read.eq(0)
 250
 251                     m.next = "INSN_READ"  # move to "wait for bus" phase
 252
 253             # dummy pause to find out why simulation is not keeping up
 254             with m.State("INSN_READ"):
 255                 if self.allow_overlap:
 256                     stopping = dbg.stopping_o
 257                 else:
 258                     stopping = Const(0)
 259                 with m.If(stopping):
 260                     # stopping: jump back to idle
 261                     m.next = "IDLE"
 262                 with m.Else():
 263                     # one cycle later, msr/sv read arrives.  valid only once.
 264                     with m.If(~msr_read):
 265                         sync += msr_read.eq(1) # yeah don't read it again
 266                         sync += cur_state.msr.eq(state_r_msr.o_data)
 267                     with m.If(self.imem.f_busy_o): # zzz...
 268                         # busy: stay in wait-read
 269                         comb += self.imem.a_i_valid.eq(1)
 270                         comb += self.imem.f_i_valid.eq(1)
 271                     with m.Else():
 272                         # not busy: instruction fetched
 273                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 274                         if self.svp64_en:
 275                             svp64 = self.svp64
 276                             # decode the SVP64 prefix, if any
 277                             comb += svp64.raw_opcode_in.eq(insn)
 278                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 279                             # pass the decoded prefix (if any) to PowerDecoder2
 280                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 281                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 282                             # remember whether this is a prefixed instruction,
 283                             # so the FSM can readily loop when VL==0
 284                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 285                             # calculate the address of the following instruction
 286                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 287                             sync += nia.eq(cur_state.pc + insn_size)
 288                             with m.If(~svp64.is_svp64_mode):
 289                                 # with no prefix, store the instruction
 290                                 # and hand it directly to the next FSM
 291                                 sync += dec_opcode_o.eq(insn)
 292                                 m.next = "INSN_READY"
 293                             with m.Else():
 294                                 # fetch the rest of the instruction from memory
 295                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 296                                 comb += self.imem.a_i_valid.eq(1)
 297                                 comb += self.imem.f_i_valid.eq(1)
 298                                 m.next = "INSN_READ2"
 299                         else:
 300                             # not SVP64 - 32-bit only
 301                             sync += nia.eq(cur_state.pc + 4)
 302                             sync += dec_opcode_o.eq(insn)
 303                             m.next = "INSN_READY"
 304
 305             with m.State("INSN_READ2"):
 306                 with m.If(self.imem.f_busy_o):  # zzz...
 307                     # busy: stay in wait-read
 308                     comb += self.imem.a_i_valid.eq(1)
 309                     comb += self.imem.f_i_valid.eq(1)
 310                 with m.Else():
 311                     # not busy: instruction fetched
 312                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 313                     sync += dec_opcode_o.eq(insn)
 314                     m.next = "INSN_READY"
 315                     # TODO: probably can start looking at pdecode2.rm_dec
 316                     # here or maybe even in INSN_READ state, if svp64_mode
 317                     # detected, in order to trigger - and wait for - the
 318                     # predicate reading.
 319                     if self.svp64_en:
 320                         pmode = pdecode2.rm_dec.predmode
 321                     """
 322                     if pmode != SVP64PredMode.ALWAYS.value:
 323                         fire predicate loading FSM and wait before
 324                         moving to INSN_READY
 325                     else:
 326                         sync += self.srcmask.eq(-1) # set to all 1s
 327                         sync += self.dstmask.eq(-1) # set to all 1s
 328                         m.next = "INSN_READY"
 329                     """
 330
 331             with m.State("INSN_READY"):
 332                 # hand over the instruction, to be decoded
 333                 comb += fetch_insn_o_valid.eq(1)
 334                 with m.If(fetch_insn_i_ready):
 335                     m.next = "IDLE"
 336
 337         # whatever was done above, over-ride it if core reset is held
 338         with m.If(self.core_rst):
 339             sync += nia.eq(0)
 340
 341         return m
 342
 343
 344 class TestIssuerInternal(Elaboratable):
 345     """TestIssuer - reads instructions from TestMemory and issues them
 346
 347     efficiency and speed is not the main goal here: functional correctness
 348     and code clarity is.  optimisations (which almost 100% interfere with
 349     easy understanding) come later.
 350     """
 351     def __init__(self, pspec):
 352
 353         # test is SVP64 is to be enabled
 354         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 355
 356         # and if regfiles are reduced
 357         self.regreduce_en = (hasattr(pspec, "regreduce") and
 358                                             (pspec.regreduce == True))
 359
 360         # and if overlap requested
 361         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 362                                             (pspec.allow_overlap == True))
 363
 364         # JTAG interface.  add this right at the start because if it's
 365         # added it *modifies* the pspec, by adding enable/disable signals
 366         # for parts of the rest of the core
 367         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 368         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 369         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 370         if self.jtag_en:
 371             # XXX MUST keep this up-to-date with litex, and
 372             # soc-cocotb-sim, and err.. all needs sorting out, argh
 373             subset = ['uart',
 374                       'mtwi',
 375                       'eint', 'gpio', 'mspi0',
 376                       # 'mspi1', - disabled for now
 377                       # 'pwm', 'sd0', - disabled for now
 378                        'sdr']
 379             self.jtag = JTAG(get_pinspecs(subset=subset),
 380                              domain=self.dbg_domain)
 381             # add signals to pspec to enable/disable icache and dcache
 382             # (or data and intstruction wishbone if icache/dcache not included)
 383             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 384             # TODO: do we actually care if these are not domain-synchronised?
 385             # honestly probably not.
 386             pspec.wb_icache_en = self.jtag.wb_icache_en
 387             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 388             self.wb_sram_en = self.jtag.wb_sram_en
 389         else:
 390             self.wb_sram_en = Const(1)
 391
 392         # add 4k sram blocks?
 393         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 394                          pspec.sram4x4kblock == True)
 395         if self.sram4x4k:
 396             self.sram4k = []
 397             for i in range(4):
 398                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 399                                                     #features={'err'}
 400                                                     ))
 401
 402         # add interrupt controller?
 403         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 404         if self.xics:
 405             self.xics_icp = XICS_ICP()
 406             self.xics_ics = XICS_ICS()
 407             self.int_level_i = self.xics_ics.int_level_i
 408
 409         # add GPIO peripheral?
 410         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 411         if self.gpio:
 412             self.simple_gpio = SimpleGPIO()
 413             self.gpio_o = self.simple_gpio.gpio_o
 414
 415         # main instruction core.  suitable for prototyping / demo only
 416         self.core = core = NonProductionCore(pspec)
 417         self.core_rst = ResetSignal("coresync")
 418
 419         # instruction decoder.  goes into Trap Record
 420         #pdecode = create_pdecode()
 421         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 422         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 423                                      opkls=IssuerDecode2ToOperand,
 424                                      svp64_en=self.svp64_en,
 425                                      regreduce_en=self.regreduce_en)
 426         pdecode = self.pdecode2.dec
 427
 428         if self.svp64_en:
 429             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 430
 431         # Test Instruction memory
 432         self.imem = ConfigFetchUnit(pspec).fu
 433
 434         # DMI interface
 435         self.dbg = CoreDebug()
 436
 437         # instruction go/monitor
 438         self.pc_o = Signal(64, reset_less=True)
 439         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 440         self.svstate_i = Data(64, "svstate_i") # ditto
 441         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 442         self.busy_o = Signal(reset_less=True)
 443         self.memerr_o = Signal(reset_less=True)
 444
 445         # STATE regfile read /write ports for PC, MSR, SVSTATE
 446         staterf = self.core.regs.rf['state']
 447         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 448         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 449         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 450         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 451
 452         # DMI interface access
 453         intrf = self.core.regs.rf['int']
 454         crrf = self.core.regs.rf['cr']
 455         xerrf = self.core.regs.rf['xer']
 456         self.int_r = intrf.r_ports['dmi'] # INT read
 457         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 458         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 459
 460         if self.svp64_en:
 461             # for predication
 462             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 463             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 464
 465         # hack method of keeping an eye on whether branch/trap set the PC
 466         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 467         self.state_nia.wen.name = 'state_nia_wen'
 468
 469         # pulse to synchronize the simulator at instruction end
 470         self.insn_done = Signal()
 471
 472         # indicate any instruction still outstanding, in execution
 473         self.any_busy = Signal()
 474
 475         if self.svp64_en:
 476             # store copies of predicate masks
 477             self.srcmask = Signal(64)
 478             self.dstmask = Signal(64)
 479
 480     def fetch_predicate_fsm(self, m,
 481                             pred_insn_i_valid, pred_insn_o_ready,
 482                             pred_mask_o_valid, pred_mask_i_ready):
 483         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 484            src/dest predicate masks
 485
 486         https://bugs.libre-soc.org/show_bug.cgi?id=617
 487         the predicates can be read here, by using IntRegs r_ports['pred']
 488         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 489         be done through multiple reads, extracting one relevant at a time.
 490         later, a faster way would be to use the 32-bit-wide CR port but
 491         this is more complex decoding, here.  equivalent code used in
 492         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 493
 494         note: this ENTIRE FSM is not to be called when svp64 is disabled
 495         """
 496         comb = m.d.comb
 497         sync = m.d.sync
 498         pdecode2 = self.pdecode2
 499         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 500         predmode = rm_dec.predmode
 501         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 502         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 503         # get src/dst step, so we can skip already used mask bits
 504         cur_state = self.cur_state
 505         srcstep = cur_state.svstate.srcstep
 506         dststep = cur_state.svstate.dststep
 507         cur_vl = cur_state.svstate.vl
 508
 509         # decode predicates
 510         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 511         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 512         sidx, scrinvert = get_predcr(m, srcpred, 's')
 513         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 514
 515         # store fetched masks, for either intpred or crpred
 516         # when src/dst step is not zero, the skipped mask bits need to be
 517         # shifted-out, before actually storing them in src/dest mask
 518         new_srcmask = Signal(64, reset_less=True)
 519         new_dstmask = Signal(64, reset_less=True)
 520
 521         with m.FSM(name="fetch_predicate"):
 522
 523             with m.State("FETCH_PRED_IDLE"):
 524                 comb += pred_insn_o_ready.eq(1)
 525                 with m.If(pred_insn_i_valid):
 526                     with m.If(predmode == SVP64PredMode.INT):
 527                         # skip fetching destination mask register, when zero
 528                         with m.If(dall1s):
 529                             sync += new_dstmask.eq(-1)
 530                             # directly go to fetch source mask register
 531                             # guaranteed not to be zero (otherwise predmode
 532                             # would be SVP64PredMode.ALWAYS, not INT)
 533                             comb += int_pred.addr.eq(sregread)
 534                             comb += int_pred.ren.eq(1)
 535                             m.next = "INT_SRC_READ"
 536                         # fetch destination predicate register
 537                         with m.Else():
 538                             comb += int_pred.addr.eq(dregread)
 539                             comb += int_pred.ren.eq(1)
 540                             m.next = "INT_DST_READ"
 541                     with m.Elif(predmode == SVP64PredMode.CR):
 542                         # go fetch masks from the CR register file
 543                         sync += new_srcmask.eq(0)
 544                         sync += new_dstmask.eq(0)
 545                         m.next = "CR_READ"
 546                     with m.Else():
 547                         sync += self.srcmask.eq(-1)
 548                         sync += self.dstmask.eq(-1)
 549                         m.next = "FETCH_PRED_DONE"
 550
 551             with m.State("INT_DST_READ"):
 552                 # store destination mask
 553                 inv = Repl(dinvert, 64)
 554                 with m.If(dunary):
 555                     # set selected mask bit for 1<<r3 mode
 556                     dst_shift = Signal(range(64))
 557                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 558                     sync += new_dstmask.eq(1 << dst_shift)
 559                 with m.Else():
 560                     # invert mask if requested
 561                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 562                 # skip fetching source mask register, when zero
 563                 with m.If(sall1s):
 564                     sync += new_srcmask.eq(-1)
 565                     m.next = "FETCH_PRED_SHIFT_MASK"
 566                 # fetch source predicate register
 567                 with m.Else():
 568                     comb += int_pred.addr.eq(sregread)
 569                     comb += int_pred.ren.eq(1)
 570                     m.next = "INT_SRC_READ"
 571
 572             with m.State("INT_SRC_READ"):
 573                 # store source mask
 574                 inv = Repl(sinvert, 64)
 575                 with m.If(sunary):
 576                     # set selected mask bit for 1<<r3 mode
 577                     src_shift = Signal(range(64))
 578                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 579                     sync += new_srcmask.eq(1 << src_shift)
 580                 with m.Else():
 581                     # invert mask if requested
 582                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 583                 m.next = "FETCH_PRED_SHIFT_MASK"
 584
 585             # fetch masks from the CR register file
 586             # implements the following loop:
 587             # idx, inv = get_predcr(mask)
 588             # mask = 0
 589             # for cr_idx in range(vl):
 590             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 591             #     if cr[idx] ^ inv:
 592             #         mask |= 1 << cr_idx
 593             # return mask
 594             with m.State("CR_READ"):
 595                 # CR index to be read, which will be ready by the next cycle
 596                 cr_idx = Signal.like(cur_vl, reset_less=True)
 597                 # submit the read operation to the regfile
 598                 with m.If(cr_idx != cur_vl):
 599                     # the CR read port is unary ...
 600                     # ren = 1 << cr_idx
 601                     # ... in MSB0 convention ...
 602                     # ren = 1 << (7 - cr_idx)
 603                     # ... and with an offset:
 604                     # ren = 1 << (7 - off - cr_idx)
 605                     idx = SVP64CROffs.CRPred + cr_idx
 606                     comb += cr_pred.ren.eq(1 << (7 - idx))
 607                     # signal data valid in the next cycle
 608                     cr_read = Signal(reset_less=True)
 609                     sync += cr_read.eq(1)
 610                     # load the next index
 611                     sync += cr_idx.eq(cr_idx + 1)
 612                 with m.Else():
 613                     # exit on loop end
 614                     sync += cr_read.eq(0)
 615                     sync += cr_idx.eq(0)
 616                     m.next = "FETCH_PRED_SHIFT_MASK"
 617                 with m.If(cr_read):
 618                     # compensate for the one cycle delay on the regfile
 619                     cur_cr_idx = Signal.like(cur_vl)
 620                     comb += cur_cr_idx.eq(cr_idx - 1)
 621                     # read the CR field, select the appropriate bit
 622                     cr_field = Signal(4)
 623                     scr_bit = Signal()
 624                     dcr_bit = Signal()
 625                     comb += cr_field.eq(cr_pred.o_data)
 626                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 627                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 628                     # set the corresponding mask bit
 629                     bit_to_set = Signal.like(self.srcmask)
 630                     comb += bit_to_set.eq(1 << cur_cr_idx)
 631                     with m.If(scr_bit):
 632                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 633                     with m.If(dcr_bit):
 634                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 635
 636             with m.State("FETCH_PRED_SHIFT_MASK"):
 637                 # shift-out skipped mask bits
 638                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 639                 sync += self.dstmask.eq(new_dstmask >> dststep)
 640                 m.next = "FETCH_PRED_DONE"
 641
 642             with m.State("FETCH_PRED_DONE"):
 643                 comb += pred_mask_o_valid.eq(1)
 644                 with m.If(pred_mask_i_ready):
 645                     m.next = "FETCH_PRED_IDLE"
 646
 647     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 648                   dbg, core_rst, is_svp64_mode,
 649                   fetch_pc_o_ready, fetch_pc_i_valid,
 650                   fetch_insn_o_valid, fetch_insn_i_ready,
 651                   pred_insn_i_valid, pred_insn_o_ready,
 652                   pred_mask_o_valid, pred_mask_i_ready,
 653                   exec_insn_i_valid, exec_insn_o_ready,
 654                   exec_pc_o_valid, exec_pc_i_ready):
 655         """issue FSM
 656
 657         decode / issue FSM.  this interacts with the "fetch" FSM
 658         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 659         (outgoing). also interacts with the "execute" FSM
 660         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 661         (incoming).
 662         SVP64 RM prefixes have already been set up by the
 663         "fetch" phase, so execute is fairly straightforward.
 664         """
 665
 666         comb = m.d.comb
 667         sync = m.d.sync
 668         pdecode2 = self.pdecode2
 669         cur_state = self.cur_state
 670
 671         # temporaries
 672         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 673
 674         # for updating svstate (things like srcstep etc.)
 675         update_svstate = Signal() # set this (below) if updating
 676         new_svstate = SVSTATERec("new_svstate")
 677         comb += new_svstate.eq(cur_state.svstate)
 678
 679         # precalculate srcstep+1 and dststep+1
 680         cur_srcstep = cur_state.svstate.srcstep
 681         cur_dststep = cur_state.svstate.dststep
 682         next_srcstep = Signal.like(cur_srcstep)
 683         next_dststep = Signal.like(cur_dststep)
 684         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 685         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 686
 687         # note if an exception happened.  in a pipelined or OoO design
 688         # this needs to be accompanied by "shadowing" (or stalling)
 689         exc_happened = self.core.o.exc_happened
 690
 691         with m.FSM(name="issue_fsm"):
 692
 693             # sync with the "fetch" phase which is reading the instruction
 694             # at this point, there is no instruction running, that
 695             # could inadvertently update the PC.
 696             with m.State("ISSUE_START"):
 697                 # wait on "core stop" release, before next fetch
 698                 # need to do this here, in case we are in a VL==0 loop
 699                 with m.If(~dbg.core_stop_o & ~core_rst):
 700                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 701                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 702                         m.next = "INSN_WAIT"
 703                 with m.Else():
 704                     # tell core it's stopped, and acknowledge debug handshake
 705                     comb += dbg.core_stopped_i.eq(1)
 706                     # while stopped, allow updating the PC and SVSTATE
 707                     with m.If(self.pc_i.ok):
 708                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 709                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 710                         sync += pc_changed.eq(1)
 711                     with m.If(self.svstate_i.ok):
 712                         comb += new_svstate.eq(self.svstate_i.data)
 713                         comb += update_svstate.eq(1)
 714                         sync += sv_changed.eq(1)
 715
 716             # wait for an instruction to arrive from Fetch
 717             with m.State("INSN_WAIT"):
 718                 if self.allow_overlap:
 719                     stopping = dbg.stopping_o
 720                 else:
 721                     stopping = Const(0)
 722                 with m.If(stopping):
 723                     # stopping: jump back to idle
 724                     m.next = "ISSUE_START"
 725                 with m.Else():
 726                     comb += fetch_insn_i_ready.eq(1)
 727                     with m.If(fetch_insn_o_valid):
 728                         # loop into ISSUE_START if it's a SVP64 instruction
 729                         # and VL == 0.  this because VL==0 is a for-loop
 730                         # from 0 to 0 i.e. always, always a NOP.
 731                         cur_vl = cur_state.svstate.vl
 732                         with m.If(is_svp64_mode & (cur_vl == 0)):
 733                             # update the PC before fetching the next instruction
 734                             # since we are in a VL==0 loop, no instruction was
 735                             # executed that we could be overwriting
 736                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 737                             comb += self.state_w_pc.i_data.eq(nia)
 738                             comb += self.insn_done.eq(1)
 739                             m.next = "ISSUE_START"
 740                         with m.Else():
 741                             if self.svp64_en:
 742                                 m.next = "PRED_START"  # fetching predicate
 743                             else:
 744                                 m.next = "DECODE_SV"  # skip predication
 745
 746             with m.State("PRED_START"):
 747                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 748                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 749                     m.next = "MASK_WAIT"
 750
 751             with m.State("MASK_WAIT"):
 752                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 753                 with m.If(pred_mask_o_valid): # predication masks are ready
 754                     m.next = "PRED_SKIP"
 755
 756             # skip zeros in predicate
 757             with m.State("PRED_SKIP"):
 758                 with m.If(~is_svp64_mode):
 759                     m.next = "DECODE_SV"  # nothing to do
 760                 with m.Else():
 761                     if self.svp64_en:
 762                         pred_src_zero = pdecode2.rm_dec.pred_sz
 763                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 764
 765                         # new srcstep, after skipping zeros
 766                         skip_srcstep = Signal.like(cur_srcstep)
 767                         # value to be added to the current srcstep
 768                         src_delta = Signal.like(cur_srcstep)
 769                         # add leading zeros to srcstep, if not in zero mode
 770                         with m.If(~pred_src_zero):
 771                             # priority encoder (count leading zeros)
 772                             # append guard bit, in case the mask is all zeros
 773                             pri_enc_src = PriorityEncoder(65)
 774                             m.submodules.pri_enc_src = pri_enc_src
 775                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 776                                                          Const(1, 1)))
 777                             comb += src_delta.eq(pri_enc_src.o)
 778                         # apply delta to srcstep
 779                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 780                         # shift-out all leading zeros from the mask
 781                         # plus the leading "one" bit
 782                         # TODO count leading zeros and shift-out the zero
 783                         #      bits, in the same step, in hardware
 784                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 785
 786                         # same as above, but for dststep
 787                         skip_dststep = Signal.like(cur_dststep)
 788                         dst_delta = Signal.like(cur_dststep)
 789                         with m.If(~pred_dst_zero):
 790                             pri_enc_dst = PriorityEncoder(65)
 791                             m.submodules.pri_enc_dst = pri_enc_dst
 792                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 793                                                          Const(1, 1)))
 794                             comb += dst_delta.eq(pri_enc_dst.o)
 795                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 796                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 797
 798                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 799                         with m.If((skip_srcstep >= cur_vl) |
 800                                   (skip_dststep >= cur_vl)):
 801                             # end of VL loop. Update PC and reset src/dst step
 802                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 803                             comb += self.state_w_pc.i_data.eq(nia)
 804                             comb += new_svstate.srcstep.eq(0)
 805                             comb += new_svstate.dststep.eq(0)
 806                             comb += update_svstate.eq(1)
 807                             # synchronize with the simulator
 808                             comb += self.insn_done.eq(1)
 809                             # go back to Issue
 810                             m.next = "ISSUE_START"
 811                         with m.Else():
 812                             # update new src/dst step
 813                             comb += new_svstate.srcstep.eq(skip_srcstep)
 814                             comb += new_svstate.dststep.eq(skip_dststep)
 815                             comb += update_svstate.eq(1)
 816                             # proceed to Decode
 817                             m.next = "DECODE_SV"
 818
 819                         # pass predicate mask bits through to satellite decoders
 820                         # TODO: for SIMD this will be *multiple* bits
 821                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 822                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 823
 824             # after src/dst step have been updated, we are ready
 825             # to decode the instruction
 826             with m.State("DECODE_SV"):
 827                 # decode the instruction
 828                 sync += core.i.e.eq(pdecode2.e)
 829                 sync += core.i.state.eq(cur_state)
 830                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 831                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 832                 if self.svp64_en:
 833                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 834                     # set RA_OR_ZERO detection in satellite decoders
 835                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 836                     # and svp64 detection
 837                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 838                     # and svp64 bit-rev'd ldst mode
 839                     ldst_dec = pdecode2.use_svp64_ldst_dec
 840                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 841                 # after decoding, reset any previous exception condition,
 842                 # allowing it to be set again during the next execution
 843                 sync += pdecode2.ldst_exc.eq(0)
 844
 845                 m.next = "INSN_EXECUTE"  # move to "execute"
 846
 847             # handshake with execution FSM, move to "wait" once acknowledged
 848             with m.State("INSN_EXECUTE"):
 849                 comb += exec_insn_i_valid.eq(1) # trigger execute
 850                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 851                     m.next = "EXECUTE_WAIT"
 852
 853             with m.State("EXECUTE_WAIT"):
 854                 # wait on "core stop" release, at instruction end
 855                 # need to do this here, in case we are in a VL>1 loop
 856                 with m.If(~dbg.core_stop_o & ~core_rst):
 857                     comb += exec_pc_i_ready.eq(1)
 858                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 859                     # the exception info needs to be blatted into
 860                     # pdecode.ldst_exc, and the instruction "re-run".
 861                     # when ldst_exc.happened is set, the PowerDecoder2
 862                     # reacts very differently: it re-writes the instruction
 863                     # with a "trap" (calls PowerDecoder2.trap()) which
 864                     # will *overwrite* whatever was requested and jump the
 865                     # PC to the exception address, as well as alter MSR.
 866                     # nothing else needs to be done other than to note
 867                     # the change of PC and MSR (and, later, SVSTATE)
 868                     with m.If(exc_happened):
 869                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 870
 871                     with m.If(exec_pc_o_valid):
 872
 873                         # was this the last loop iteration?
 874                         is_last = Signal()
 875                         cur_vl = cur_state.svstate.vl
 876                         comb += is_last.eq(next_srcstep == cur_vl)
 877
 878                         # return directly to Decode if Execute generated an
 879                         # exception.
 880                         with m.If(pdecode2.ldst_exc.happened):
 881                             m.next = "DECODE_SV"
 882
 883                         # if either PC or SVSTATE were changed by the previous
 884                         # instruction, go directly back to Fetch, without
 885                         # updating either PC or SVSTATE
 886                         with m.Elif(pc_changed | sv_changed):
 887                             m.next = "ISSUE_START"
 888
 889                         # also return to Fetch, when no output was a vector
 890                         # (regardless of SRCSTEP and VL), or when the last
 891                         # instruction was really the last one of the VL loop
 892                         with m.Elif((~pdecode2.loop_continue) | is_last):
 893                             # before going back to fetch, update the PC state
 894                             # register with the NIA.
 895                             # ok here we are not reading the branch unit.
 896                             # TODO: this just blithely overwrites whatever
 897                             #       pipeline updated the PC
 898                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 899                             comb += self.state_w_pc.i_data.eq(nia)
 900                             # reset SRCSTEP before returning to Fetch
 901                             if self.svp64_en:
 902                                 with m.If(pdecode2.loop_continue):
 903                                     comb += new_svstate.srcstep.eq(0)
 904                                     comb += new_svstate.dststep.eq(0)
 905                                     comb += update_svstate.eq(1)
 906                             else:
 907                                 comb += new_svstate.srcstep.eq(0)
 908                                 comb += new_svstate.dststep.eq(0)
 909                                 comb += update_svstate.eq(1)
 910                             m.next = "ISSUE_START"
 911
 912                         # returning to Execute? then, first update SRCSTEP
 913                         with m.Else():
 914                             comb += new_svstate.srcstep.eq(next_srcstep)
 915                             comb += new_svstate.dststep.eq(next_dststep)
 916                             comb += update_svstate.eq(1)
 917                             # return to mask skip loop
 918                             m.next = "PRED_SKIP"
 919
 920                 with m.Else():
 921                     comb += dbg.core_stopped_i.eq(1)
 922                     # while stopped, allow updating the PC and SVSTATE
 923                     with m.If(self.pc_i.ok):
 924                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 925                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 926                         sync += pc_changed.eq(1)
 927                     with m.If(self.svstate_i.ok):
 928                         comb += new_svstate.eq(self.svstate_i.data)
 929                         comb += update_svstate.eq(1)
 930                         sync += sv_changed.eq(1)
 931
 932         # check if svstate needs updating: if so, write it to State Regfile
 933         with m.If(update_svstate):
 934             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 935             comb += self.state_w_sv.i_data.eq(new_svstate)
 936             sync += cur_state.svstate.eq(new_svstate) # for next clock
 937
 938     def execute_fsm(self, m, core, pc_changed, sv_changed,
 939                     exec_insn_i_valid, exec_insn_o_ready,
 940                     exec_pc_o_valid, exec_pc_i_ready):
 941         """execute FSM
 942
 943         execute FSM. this interacts with the "issue" FSM
 944         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 945         (outgoing). SVP64 RM prefixes have already been set up by the
 946         "issue" phase, so execute is fairly straightforward.
 947         """
 948
 949         comb = m.d.comb
 950         sync = m.d.sync
 951         pdecode2 = self.pdecode2
 952
 953         # temporaries
 954         core_busy_o = core.n.o_data.busy_o # core is busy
 955         core_ivalid_i = core.p.i_valid              # instruction is valid
 956
 957         with m.FSM(name="exec_fsm"):
 958
 959             # waiting for instruction bus (stays there until not busy)
 960             with m.State("INSN_START"):
 961                 comb += exec_insn_o_ready.eq(1)
 962                 with m.If(exec_insn_i_valid):
 963                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 964                     sync += sv_changed.eq(0)
 965                     sync += pc_changed.eq(0)
 966                     with m.If(core.p.o_ready): # only move if accepted
 967                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 968
 969             # instruction started: must wait till it finishes
 970             with m.State("INSN_ACTIVE"):
 971                 # note changes to PC and SVSTATE
 972                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 973                     sync += sv_changed.eq(1)
 974                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 975                     sync += pc_changed.eq(1)
 976                 with m.If(~core_busy_o): # instruction done!
 977                     comb += exec_pc_o_valid.eq(1)
 978                     with m.If(exec_pc_i_ready):
 979                         # when finished, indicate "done".
 980                         # however, if there was an exception, the instruction
 981                         # is *not* yet done.  this is an implementation
 982                         # detail: we choose to implement exceptions by
 983                         # taking the exception information from the LDST
 984                         # unit, putting that *back* into the PowerDecoder2,
 985                         # and *re-running the entire instruction*.
 986                         # if we erroneously indicate "done" here, it is as if
 987                         # there were *TWO* instructions:
 988                         # 1) the failed LDST 2) a TRAP.
 989                         with m.If(~pdecode2.ldst_exc.happened):
 990                             comb += self.insn_done.eq(1)
 991                         m.next = "INSN_START"  # back to fetch
 992
 993     def setup_peripherals(self, m):
 994         comb, sync = m.d.comb, m.d.sync
 995
 996         # okaaaay so the debug module must be in coresync clock domain
 997         # but NOT its reset signal. to cope with this, set every single
 998         # submodule explicitly in coresync domain, debug and JTAG
 999         # in their own one but using *external* reset.
1000         csd = DomainRenamer("coresync")
1001         dbd = DomainRenamer(self.dbg_domain)
1002
1003         m.submodules.core = core = csd(self.core)
1004         m.submodules.imem = imem = csd(self.imem)
1005         m.submodules.dbg = dbg = dbd(self.dbg)
1006         if self.jtag_en:
1007             m.submodules.jtag = jtag = dbd(self.jtag)
1008             # TODO: UART2GDB mux, here, from external pin
1009             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1010             sync += dbg.dmi.connect_to(jtag.dmi)
1011
1012         cur_state = self.cur_state
1013
1014         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1015         if self.sram4x4k:
1016             for i, sram in enumerate(self.sram4k):
1017                 m.submodules["sram4k_%d" % i] = csd(sram)
1018                 comb += sram.enable.eq(self.wb_sram_en)
1019
1020         # XICS interrupt handler
1021         if self.xics:
1022             m.submodules.xics_icp = icp = csd(self.xics_icp)
1023             m.submodules.xics_ics = ics = csd(self.xics_ics)
1024             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1025             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1026
1027         # GPIO test peripheral
1028         if self.gpio:
1029             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1030
1031         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1032         # XXX causes litex ECP5 test to get wrong idea about input and output
1033         # (but works with verilator sim *sigh*)
1034         #if self.gpio and self.xics:
1035         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1036
1037         # instruction decoder
1038         pdecode = create_pdecode()
1039         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1040         if self.svp64_en:
1041             m.submodules.svp64 = svp64 = csd(self.svp64)
1042
1043         # convenience
1044         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1045         intrf = self.core.regs.rf['int']
1046
1047         # clock delay power-on reset
1048         cd_por  = ClockDomain(reset_less=True)
1049         cd_sync = ClockDomain()
1050         core_sync = ClockDomain("coresync")
1051         m.domains += cd_por, cd_sync, core_sync
1052         if self.dbg_domain != "sync":
1053             dbg_sync = ClockDomain(self.dbg_domain)
1054             m.domains += dbg_sync
1055
1056         ti_rst = Signal(reset_less=True)
1057         delay = Signal(range(4), reset=3)
1058         with m.If(delay != 0):
1059             m.d.por += delay.eq(delay - 1)
1060         comb += cd_por.clk.eq(ClockSignal())
1061
1062         # power-on reset delay
1063         core_rst = ResetSignal("coresync")
1064         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1065         comb += core_rst.eq(ti_rst)
1066
1067         # debug clock is same as coresync, but reset is *main external*
1068         if self.dbg_domain != "sync":
1069             dbg_rst = ResetSignal(self.dbg_domain)
1070             comb += dbg_rst.eq(ResetSignal())
1071
1072         # busy/halted signals from core
1073         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1074         comb += self.busy_o.eq(core_busy_o)
1075         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1076
1077         # temporary hack: says "go" immediately for both address gen and ST
1078         l0 = core.l0
1079         ldst = core.fus.fus['ldst0']
1080         st_go_edge = rising_edge(m, ldst.st.rel_o)
1081         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1082         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1083
1084     def elaborate(self, platform):
1085         m = Module()
1086         # convenience
1087         comb, sync = m.d.comb, m.d.sync
1088         cur_state = self.cur_state
1089         pdecode2 = self.pdecode2
1090         dbg = self.dbg
1091         core = self.core
1092
1093         # set up peripherals and core
1094         core_rst = self.core_rst
1095         self.setup_peripherals(m)
1096
1097         # reset current state if core reset requested
1098         with m.If(core_rst):
1099             m.d.sync += self.cur_state.eq(0)
1100
1101         # PC and instruction from I-Memory
1102         comb += self.pc_o.eq(cur_state.pc)
1103         pc_changed = Signal() # note write to PC
1104         sv_changed = Signal() # note write to SVSTATE
1105
1106         # indicate to outside world if any FU is still executing
1107         comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1108
1109         # read state either from incoming override or from regfile
1110         # TODO: really should be doing MSR in the same way
1111         pc = state_get(m, core_rst, self.pc_i,
1112                             "pc",                  # read PC
1113                             self.state_r_pc, StateRegs.PC)
1114         svstate = state_get(m, core_rst, self.svstate_i,
1115                             "svstate",   # read SVSTATE
1116                             self.state_r_sv, StateRegs.SVSTATE)
1117
1118         # don't write pc every cycle
1119         comb += self.state_w_pc.wen.eq(0)
1120         comb += self.state_w_pc.i_data.eq(0)
1121
1122         # address of the next instruction, in the absence of a branch
1123         # depends on the instruction size
1124         nia = Signal(64)
1125
1126         # connect up debug signals
1127         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1128         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1129         comb += dbg.state.pc.eq(pc)
1130         comb += dbg.state.svstate.eq(svstate)
1131         comb += dbg.state.msr.eq(cur_state.msr)
1132
1133         # pass the prefix mode from Fetch to Issue, so the latter can loop
1134         # on VL==0
1135         is_svp64_mode = Signal()
1136
1137         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1138         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1139         # these are the handshake signals between each
1140
1141         # fetch FSM can run as soon as the PC is valid
1142         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1143         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1144
1145         # fetch FSM hands over the instruction to be decoded / issued
1146         fetch_insn_o_valid = Signal()
1147         fetch_insn_i_ready = Signal()
1148
1149         # predicate fetch FSM decodes and fetches the predicate
1150         pred_insn_i_valid = Signal()
1151         pred_insn_o_ready = Signal()
1152
1153         # predicate fetch FSM delivers the masks
1154         pred_mask_o_valid = Signal()
1155         pred_mask_i_ready = Signal()
1156
1157         # issue FSM delivers the instruction to the be executed
1158         exec_insn_i_valid = Signal()
1159         exec_insn_o_ready = Signal()
1160
1161         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1162         exec_pc_o_valid = Signal()
1163         exec_pc_i_ready = Signal()
1164
1165         # the FSMs here are perhaps unusual in that they detect conditions
1166         # then "hold" information, combinatorially, for the core
1167         # (as opposed to using sync - which would be on a clock's delay)
1168         # this includes the actual opcode, valid flags and so on.
1169
1170         # Fetch, then predicate fetch, then Issue, then Execute.
1171         # Issue is where the VL for-loop # lives.  the ready/valid
1172         # signalling is used to communicate between the four.
1173
1174         # set up Fetch FSM
1175         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1176                         self.imem, core_rst, pdecode2, cur_state,
1177                        dbg, core, svstate, nia, is_svp64_mode)
1178         m.submodules.fetch = fetch
1179         # connect up in/out data to existing Signals
1180         comb += fetch.p.i_data.pc.eq(pc)
1181         # and the ready/valid signalling
1182         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1183         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1184         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1185         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1186
1187         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1188                        dbg, core_rst, is_svp64_mode,
1189                        fetch_pc_o_ready, fetch_pc_i_valid,
1190                        fetch_insn_o_valid, fetch_insn_i_ready,
1191                        pred_insn_i_valid, pred_insn_o_ready,
1192                        pred_mask_o_valid, pred_mask_i_ready,
1193                        exec_insn_i_valid, exec_insn_o_ready,
1194                        exec_pc_o_valid, exec_pc_i_ready)
1195
1196         if self.svp64_en:
1197             self.fetch_predicate_fsm(m,
1198                                      pred_insn_i_valid, pred_insn_o_ready,
1199                                      pred_mask_o_valid, pred_mask_i_ready)
1200
1201         self.execute_fsm(m, core, pc_changed, sv_changed,
1202                          exec_insn_i_valid, exec_insn_o_ready,
1203                          exec_pc_o_valid, exec_pc_i_ready)
1204
1205         # this bit doesn't have to be in the FSM: connect up to read
1206         # regfiles on demand from DMI
1207         self.do_dmi(m, dbg)
1208
1209         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1210         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1211         self.tb_dec_fsm(m, cur_state.dec)
1212
1213         return m
1214
1215     def do_dmi(self, m, dbg):
1216         """deals with DMI debug requests
1217
1218         currently only provides read requests for the INT regfile, CR and XER
1219         it will later also deal with *writing* to these regfiles.
1220         """
1221         comb = m.d.comb
1222         sync = m.d.sync
1223         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1224         intrf = self.core.regs.rf['int']
1225
1226         with m.If(d_reg.req): # request for regfile access being made
1227             # TODO: error-check this
1228             # XXX should this be combinatorial?  sync better?
1229             if intrf.unary:
1230                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1231             else:
1232                 comb += self.int_r.addr.eq(d_reg.addr)
1233                 comb += self.int_r.ren.eq(1)
1234         d_reg_delay  = Signal()
1235         sync += d_reg_delay.eq(d_reg.req)
1236         with m.If(d_reg_delay):
1237             # data arrives one clock later
1238             comb += d_reg.data.eq(self.int_r.o_data)
1239             comb += d_reg.ack.eq(1)
1240
1241         # sigh same thing for CR debug
1242         with m.If(d_cr.req): # request for regfile access being made
1243             comb += self.cr_r.ren.eq(0b11111111) # enable all
1244         d_cr_delay  = Signal()
1245         sync += d_cr_delay.eq(d_cr.req)
1246         with m.If(d_cr_delay):
1247             # data arrives one clock later
1248             comb += d_cr.data.eq(self.cr_r.o_data)
1249             comb += d_cr.ack.eq(1)
1250
1251         # aaand XER...
1252         with m.If(d_xer.req): # request for regfile access being made
1253             comb += self.xer_r.ren.eq(0b111111) # enable all
1254         d_xer_delay  = Signal()
1255         sync += d_xer_delay.eq(d_xer.req)
1256         with m.If(d_xer_delay):
1257             # data arrives one clock later
1258             comb += d_xer.data.eq(self.xer_r.o_data)
1259             comb += d_xer.ack.eq(1)
1260
1261     def tb_dec_fsm(self, m, spr_dec):
1262         """tb_dec_fsm
1263
1264         this is a FSM for updating either dec or tb.  it runs alternately
1265         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1266         value to DEC, however the regfile has "passthrough" on it so this
1267         *should* be ok.
1268
1269         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1270         """
1271
1272         comb, sync = m.d.comb, m.d.sync
1273         fast_rf = self.core.regs.rf['fast']
1274         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1275         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1276
1277         with m.FSM() as fsm:
1278
1279             # initiates read of current DEC
1280             with m.State("DEC_READ"):
1281                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1282                 comb += fast_r_dectb.ren.eq(1)
1283                 m.next = "DEC_WRITE"
1284
1285             # waits for DEC read to arrive (1 cycle), updates with new value
1286             with m.State("DEC_WRITE"):
1287                 new_dec = Signal(64)
1288                 # TODO: MSR.LPCR 32-bit decrement mode
1289                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1290                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1291                 comb += fast_w_dectb.wen.eq(1)
1292                 comb += fast_w_dectb.i_data.eq(new_dec)
1293                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1294                 m.next = "TB_READ"
1295
1296             # initiates read of current TB
1297             with m.State("TB_READ"):
1298                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1299                 comb += fast_r_dectb.ren.eq(1)
1300                 m.next = "TB_WRITE"
1301
1302             # waits for read TB to arrive, initiates write of current TB
1303             with m.State("TB_WRITE"):
1304                 new_tb = Signal(64)
1305                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1306                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1307                 comb += fast_w_dectb.wen.eq(1)
1308                 comb += fast_w_dectb.i_data.eq(new_tb)
1309                 m.next = "DEC_READ"
1310
1311         return m
1312
1313     def __iter__(self):
1314         yield from self.pc_i.ports()
1315         yield self.pc_o
1316         yield self.memerr_o
1317         yield from self.core.ports()
1318         yield from self.imem.ports()
1319         yield self.core_bigendian_i
1320         yield self.busy_o
1321
1322     def ports(self):
1323         return list(self)
1324
1325     def external_ports(self):
1326         ports = self.pc_i.ports()
1327         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1328                 ]
1329
1330         if self.jtag_en:
1331             ports += list(self.jtag.external_ports())
1332         else:
1333             # don't add DMI if JTAG is enabled
1334             ports += list(self.dbg.dmi.ports())
1335
1336         ports += list(self.imem.ibus.fields.values())
1337         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1338
1339         if self.sram4x4k:
1340             for sram in self.sram4k:
1341                 ports += list(sram.bus.fields.values())
1342
1343         if self.xics:
1344             ports += list(self.xics_icp.bus.fields.values())
1345             ports += list(self.xics_ics.bus.fields.values())
1346             ports.append(self.int_level_i)
1347
1348         if self.gpio:
1349             ports += list(self.simple_gpio.bus.fields.values())
1350             ports.append(self.gpio_o)
1351
1352         return ports
1353
1354     def ports(self):
1355         return list(self)
1356
1357
1358 class TestIssuer(Elaboratable):
1359     def __init__(self, pspec):
1360         self.ti = TestIssuerInternal(pspec)
1361         self.pll = DummyPLL(instance=True)
1362
1363         # PLL direct clock or not
1364         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1365         if self.pll_en:
1366             self.pll_test_o = Signal(reset_less=True)
1367             self.pll_vco_o = Signal(reset_less=True)
1368             self.clk_sel_i = Signal(2, reset_less=True)
1369             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1370             self.pllclk_clk = ClockSignal("pllclk")
1371
1372     def elaborate(self, platform):
1373         m = Module()
1374         comb = m.d.comb
1375
1376         # TestIssuer nominally runs at main clock, actually it is
1377         # all combinatorial internally except for coresync'd components
1378         m.submodules.ti = ti = self.ti
1379
1380         if self.pll_en:
1381             # ClockSelect runs at PLL output internal clock rate
1382             m.submodules.wrappll = pll = self.pll
1383
1384             # add clock domains from PLL
1385             cd_pll = ClockDomain("pllclk")
1386             m.domains += cd_pll
1387
1388             # PLL clock established.  has the side-effect of running clklsel
1389             # at the PLL's speed (see DomainRenamer("pllclk") above)
1390             pllclk = self.pllclk_clk
1391             comb += pllclk.eq(pll.clk_pll_o)
1392
1393             # wire up external 24mhz to PLL
1394             #comb += pll.clk_24_i.eq(self.ref_clk)
1395             # output 18 mhz PLL test signal, and analog oscillator out
1396             comb += self.pll_test_o.eq(pll.pll_test_o)
1397             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1398
1399             # input to pll clock selection
1400             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1401
1402             # now wire up ResetSignals.  don't mind them being in this domain
1403             pll_rst = ResetSignal("pllclk")
1404             comb += pll_rst.eq(ResetSignal())
1405
1406         # internal clock is set to selector clock-out.  has the side-effect of
1407         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1408         # debug clock runs at coresync internal clock
1409         cd_coresync = ClockDomain("coresync")
1410         #m.domains += cd_coresync
1411         if self.ti.dbg_domain != 'sync':
1412             cd_dbgsync = ClockDomain("dbgsync")
1413             #m.domains += cd_dbgsync
1414         intclk = ClockSignal("coresync")
1415         dbgclk = ClockSignal(self.ti.dbg_domain)
1416         # XXX BYPASS PLL XXX
1417         # XXX BYPASS PLL XXX
1418         # XXX BYPASS PLL XXX
1419         if self.pll_en:
1420             comb += intclk.eq(self.ref_clk)
1421         else:
1422             comb += intclk.eq(ClockSignal())
1423         if self.ti.dbg_domain != 'sync':
1424             dbgclk = ClockSignal(self.ti.dbg_domain)
1425             comb += dbgclk.eq(intclk)
1426
1427         return m
1428
1429     def ports(self):
1430         return list(self.ti.ports()) + list(self.pll.ports()) + \
1431                [ClockSignal(), ResetSignal()]
1432
1433     def external_ports(self):
1434         ports = self.ti.external_ports()
1435         ports.append(ClockSignal())
1436         ports.append(ResetSignal())
1437         if self.pll_en:
1438             ports.append(self.clk_sel_i)
1439             ports.append(self.pll.clk_24_i)
1440             ports.append(self.pll_test_o)
1441             ports.append(self.pll_vco_o)
1442             ports.append(self.pllclk_clk)
1443             ports.append(self.ref_clk)
1444         return ports
1445
1446
1447 if __name__ == '__main__':
1448     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1449              'spr': 1,
1450              'div': 1,
1451              'mul': 1,
1452              'shiftrot': 1
1453             }
1454     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1455                          imem_ifacetype='bare_wb',
1456                          addr_wid=48,
1457                          mask_wid=8,
1458                          reg_wid=64,
1459                          units=units)
1460     dut = TestIssuer(pspec)
1461     vl = main(dut, ports=dut.ports(), name="test_issuer")
1462
1463     if len(sys.argv) == 1:
1464         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1465         with open("test_issuer.il", "w") as f:
1466             f.write(vl)