src/soc/simple/issuer.py

   1 """simple core issuer
   2
   3 not in any way intended for production use.  this runs a FSM that:
   4
   5 * reads the Program Counter from StateRegs
   6 * reads an instruction from a fixed-size Test Memory
   7 * issues it to the Simple Core
   8 * waits for it to complete
   9 * increments the PC
  10 * does it all over again
  11
  12 the purpose of this module is to verify the functional correctness
  13 of the Function Units in the absolute simplest and clearest possible
  14 way, and to at provide something that can be further incrementally
  15 improved.
  16 """
  17
  18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
  19                     ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
  20 from nmigen.cli import rtlil
  21 from nmigen.cli import main
  22 import sys
  23
  24 from nmutil.singlepipe import ControlBase
  25 from soc.simple.core_data import FetchOutput
  26
  27 from nmigen.lib.coding import PriorityEncoder
  28
  29 from openpower.decoder.power_decoder import create_pdecode
  30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
  31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
  32 from openpower.decoder.decode2execute1 import Data
  33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
  34                                      SVP64PredMode)
  35 from openpower.state import CoreState
  36 from openpower.consts import (CR, SVP64CROffs)
  37 from soc.experiment.testmem import TestMemory # test only for instructions
  38 from soc.regfile.regfiles import StateRegs, FastRegs
  39 from soc.simple.core import NonProductionCore
  40 from soc.config.test.test_loadstore import TestMemPspec
  41 from soc.config.ifetch import ConfigFetchUnit
  42 from soc.debug.dmi import CoreDebug, DMIInterface
  43 from soc.debug.jtag import JTAG
  44 from soc.config.pinouts import get_pinspecs
  45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
  46 from soc.bus.simple_gpio import SimpleGPIO
  47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
  48 from soc.clock.select import ClockSelect
  49 from soc.clock.dummypll import DummyPLL
  50 from openpower.sv.svstate import SVSTATERec
  51
  52
  53 from nmutil.util import rising_edge
  54
  55 def get_insn(f_instr_o, pc):
  56     if f_instr_o.width == 32:
  57         return f_instr_o
  58     else:
  59         # 64-bit: bit 2 of pc decides which word to select
  60         return f_instr_o.word_select(pc[2], 32)
  61
  62 # gets state input or reads from state regfile
  63 def state_get(m, core_rst, state_i, name, regfile, regnum):
  64     comb = m.d.comb
  65     sync = m.d.sync
  66     # read the PC
  67     res = Signal(64, reset_less=True, name=name)
  68     res_ok_delay = Signal(name="%s_ok_delay" % name)
  69     with m.If(~core_rst):
  70         sync += res_ok_delay.eq(~state_i.ok)
  71         with m.If(state_i.ok):
  72             # incoming override (start from pc_i)
  73             comb += res.eq(state_i.data)
  74         with m.Else():
  75             # otherwise read StateRegs regfile for PC...
  76             comb += regfile.ren.eq(1<<regnum)
  77         # ... but on a 1-clock delay
  78         with m.If(res_ok_delay):
  79             comb += res.eq(regfile.o_data)
  80     return res
  81
  82
  83 def get_predint(m, mask, name):
  84     """decode SVP64 predicate integer mask field to reg number and invert
  85     this is identical to the equivalent function in ISACaller except that
  86     it doesn't read the INT directly, it just decodes "what needs to be done"
  87     i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
  88
  89     * all1s is set to indicate that no mask is to be applied.
  90     * regread indicates the GPR register number to be read
  91     * invert is set to indicate that the register value is to be inverted
  92     * unary indicates that the contents of the register is to be shifted 1<<r3
  93     """
  94     comb = m.d.comb
  95     regread = Signal(5, name=name+"regread")
  96     invert = Signal(name=name+"invert")
  97     unary = Signal(name=name+"unary")
  98     all1s = Signal(name=name+"all1s")
  99     with m.Switch(mask):
 100         with m.Case(SVP64PredInt.ALWAYS.value):
 101             comb += all1s.eq(1)      # use 0b1111 (all ones)
 102         with m.Case(SVP64PredInt.R3_UNARY.value):
 103             comb += regread.eq(3)
 104             comb += unary.eq(1)        # 1<<r3 - shift r3 (single bit)
 105         with m.Case(SVP64PredInt.R3.value):
 106             comb += regread.eq(3)
 107         with m.Case(SVP64PredInt.R3_N.value):
 108             comb += regread.eq(3)
 109             comb += invert.eq(1)
 110         with m.Case(SVP64PredInt.R10.value):
 111             comb += regread.eq(10)
 112         with m.Case(SVP64PredInt.R10_N.value):
 113             comb += regread.eq(10)
 114             comb += invert.eq(1)
 115         with m.Case(SVP64PredInt.R30.value):
 116             comb += regread.eq(30)
 117         with m.Case(SVP64PredInt.R30_N.value):
 118             comb += regread.eq(30)
 119             comb += invert.eq(1)
 120     return regread, invert, unary, all1s
 121
 122
 123 def get_predcr(m, mask, name):
 124     """decode SVP64 predicate CR to reg number field and invert status
 125     this is identical to _get_predcr in ISACaller
 126     """
 127     comb = m.d.comb
 128     idx = Signal(2, name=name+"idx")
 129     invert = Signal(name=name+"crinvert")
 130     with m.Switch(mask):
 131         with m.Case(SVP64PredCR.LT.value):
 132             comb += idx.eq(CR.LT)
 133             comb += invert.eq(0)
 134         with m.Case(SVP64PredCR.GE.value):
 135             comb += idx.eq(CR.LT)
 136             comb += invert.eq(1)
 137         with m.Case(SVP64PredCR.GT.value):
 138             comb += idx.eq(CR.GT)
 139             comb += invert.eq(0)
 140         with m.Case(SVP64PredCR.LE.value):
 141             comb += idx.eq(CR.GT)
 142             comb += invert.eq(1)
 143         with m.Case(SVP64PredCR.EQ.value):
 144             comb += idx.eq(CR.EQ)
 145             comb += invert.eq(0)
 146         with m.Case(SVP64PredCR.NE.value):
 147             comb += idx.eq(CR.EQ)
 148             comb += invert.eq(1)
 149         with m.Case(SVP64PredCR.SO.value):
 150             comb += idx.eq(CR.SO)
 151             comb += invert.eq(0)
 152         with m.Case(SVP64PredCR.NS.value):
 153             comb += idx.eq(CR.SO)
 154             comb += invert.eq(1)
 155     return idx, invert
 156
 157
 158 # Fetch Finite State Machine.
 159 # WARNING: there are currently DriverConflicts but it's actually working.
 160 class FetchFSM(ControlBase):
 161     def __init__(self, allow_overlap, svp64_en, imem, core_rst,
 162                        pdecode2, cur_state,
 163                        dbg, core, pc, svstate, nia, is_svp64_mode):
 164         self.allow_overlap = allow_overlap
 165         self.svp64_en = svp64_en
 166         self.imem = imem
 167         self.core_rst = core_rst
 168         self.pdecode2 = pdecode2
 169         self.cur_state = cur_state
 170         self.dbg = dbg
 171         self.core = core
 172         self.pc = pc
 173         self.svstate = svstate
 174         self.nia = nia
 175         self.is_svp64_mode = is_svp64_mode
 176
 177         # set up pipeline ControlBase and allocate i/o specs
 178         # (unusual: normally done by the Pipeline API)
 179         super().__init__(stage=self)
 180         self.p.i_data, self.n.o_data = self.new_specs(None)
 181         self.i, self.o = self.p.i_data, self.n.o_data
 182
 183         staterf = self.core.regs.rf['state']
 184         self.state_r_msr = staterf.r_ports['msr'] # MSR rd
 185
 186     # next 3 functions are Stage API Compliance
 187     def setup(self, m, i):
 188         pass
 189
 190     def ispec(self):
 191         return Signal(name="dummy_for_now", reset_less=True)
 192
 193     def ospec(self):
 194         return FetchOutput()
 195
 196     def elaborate(self, platform):
 197         """fetch FSM
 198
 199         this FSM performs fetch of raw instruction data, partial-decodes
 200         it 32-bit at a time to detect SVP64 prefixes, and will optionally
 201         read a 2nd 32-bit quantity if that occurs.
 202         """
 203         m = super().elaborate(platform)
 204
 205         dbg = self.dbg
 206         core = self.core,
 207         pc = self.pc
 208         svstate = self.svstate
 209         nia = self.nia
 210         is_svp64_mode = self.is_svp64_mode
 211         fetch_pc_o_ready = self.p.o_ready
 212         fetch_pc_i_valid = self.p.i_valid
 213         fetch_insn_o_valid = self.n.o_valid
 214         fetch_insn_i_ready = self.n.i_ready
 215
 216         comb = m.d.comb
 217         sync = m.d.sync
 218         pdecode2 = self.pdecode2
 219         cur_state = self.cur_state
 220         dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
 221
 222         msr_read = Signal(reset=1)
 223
 224         # don't read msr every cycle
 225         comb += self.state_r_msr.ren.eq(0)
 226
 227         with m.FSM(name='fetch_fsm'):
 228
 229             # waiting (zzz)
 230             with m.State("IDLE"):
 231                 with m.If(~dbg.stopping_o):
 232                     comb += fetch_pc_o_ready.eq(1)
 233                 with m.If(fetch_pc_i_valid):
 234                     # instruction allowed to go: start by reading the PC
 235                     # capture the PC and also drop it into Insn Memory
 236                     # we have joined a pair of combinatorial memory
 237                     # lookups together.  this is Generally Bad.
 238                     comb += self.imem.a_pc_i.eq(pc)
 239                     comb += self.imem.a_i_valid.eq(1)
 240                     comb += self.imem.f_i_valid.eq(1)
 241                     sync += cur_state.pc.eq(pc)
 242                     sync += cur_state.svstate.eq(svstate) # and svstate
 243
 244                     # initiate read of MSR. arrives one clock later
 245                     comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
 246                     sync += msr_read.eq(0)
 247
 248                     m.next = "INSN_READ"  # move to "wait for bus" phase
 249
 250             # dummy pause to find out why simulation is not keeping up
 251             with m.State("INSN_READ"):
 252                 if self.allow_overlap:
 253                     stopping = dbg.stopping_o
 254                 else:
 255                     stopping = Const(0)
 256                 with m.If(stopping):
 257                     # stopping: jump back to idle
 258                     m.next = "IDLE"
 259                 with m.Else():
 260                     # one cycle later, msr/sv read arrives.  valid only once.
 261                     with m.If(~msr_read):
 262                         sync += msr_read.eq(1) # yeah don't read it again
 263                         sync += cur_state.msr.eq(self.state_r_msr.o_data)
 264                     with m.If(self.imem.f_busy_o): # zzz...
 265                         # busy: stay in wait-read
 266                         comb += self.imem.a_i_valid.eq(1)
 267                         comb += self.imem.f_i_valid.eq(1)
 268                     with m.Else():
 269                         # not busy: instruction fetched
 270                         insn = get_insn(self.imem.f_instr_o, cur_state.pc)
 271                         if self.svp64_en:
 272                             svp64 = self.svp64
 273                             # decode the SVP64 prefix, if any
 274                             comb += svp64.raw_opcode_in.eq(insn)
 275                             comb += svp64.bigendian.eq(self.core_bigendian_i)
 276                             # pass the decoded prefix (if any) to PowerDecoder2
 277                             sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
 278                             sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
 279                             # remember whether this is a prefixed instruction,
 280                             # so the FSM can readily loop when VL==0
 281                             sync += is_svp64_mode.eq(svp64.is_svp64_mode)
 282                             # calculate the address of the following instruction
 283                             insn_size = Mux(svp64.is_svp64_mode, 8, 4)
 284                             sync += nia.eq(cur_state.pc + insn_size)
 285                             with m.If(~svp64.is_svp64_mode):
 286                                 # with no prefix, store the instruction
 287                                 # and hand it directly to the next FSM
 288                                 sync += dec_opcode_o.eq(insn)
 289                                 m.next = "INSN_READY"
 290                             with m.Else():
 291                                 # fetch the rest of the instruction from memory
 292                                 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
 293                                 comb += self.imem.a_i_valid.eq(1)
 294                                 comb += self.imem.f_i_valid.eq(1)
 295                                 m.next = "INSN_READ2"
 296                         else:
 297                             # not SVP64 - 32-bit only
 298                             sync += nia.eq(cur_state.pc + 4)
 299                             sync += dec_opcode_o.eq(insn)
 300                             m.next = "INSN_READY"
 301
 302             with m.State("INSN_READ2"):
 303                 with m.If(self.imem.f_busy_o):  # zzz...
 304                     # busy: stay in wait-read
 305                     comb += self.imem.a_i_valid.eq(1)
 306                     comb += self.imem.f_i_valid.eq(1)
 307                 with m.Else():
 308                     # not busy: instruction fetched
 309                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
 310                     sync += dec_opcode_o.eq(insn)
 311                     m.next = "INSN_READY"
 312                     # TODO: probably can start looking at pdecode2.rm_dec
 313                     # here or maybe even in INSN_READ state, if svp64_mode
 314                     # detected, in order to trigger - and wait for - the
 315                     # predicate reading.
 316                     if self.svp64_en:
 317                         pmode = pdecode2.rm_dec.predmode
 318                     """
 319                     if pmode != SVP64PredMode.ALWAYS.value:
 320                         fire predicate loading FSM and wait before
 321                         moving to INSN_READY
 322                     else:
 323                         sync += self.srcmask.eq(-1) # set to all 1s
 324                         sync += self.dstmask.eq(-1) # set to all 1s
 325                         m.next = "INSN_READY"
 326                     """
 327
 328             with m.State("INSN_READY"):
 329                 # hand over the instruction, to be decoded
 330                 comb += fetch_insn_o_valid.eq(1)
 331                 with m.If(fetch_insn_i_ready):
 332                     m.next = "IDLE"
 333
 334         # whatever was done above, over-ride it if core reset is held
 335         with m.If(self.core_rst):
 336             sync += nia.eq(0)
 337
 338         return m
 339
 340
 341 class TestIssuerInternal(Elaboratable):
 342     """TestIssuer - reads instructions from TestMemory and issues them
 343
 344     efficiency and speed is not the main goal here: functional correctness
 345     and code clarity is.  optimisations (which almost 100% interfere with
 346     easy understanding) come later.
 347     """
 348     def __init__(self, pspec):
 349
 350         # test is SVP64 is to be enabled
 351         self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
 352
 353         # and if regfiles are reduced
 354         self.regreduce_en = (hasattr(pspec, "regreduce") and
 355                                             (pspec.regreduce == True))
 356
 357         # and if overlap requested
 358         self.allow_overlap = (hasattr(pspec, "allow_overlap") and
 359                                             (pspec.allow_overlap == True))
 360
 361         # JTAG interface.  add this right at the start because if it's
 362         # added it *modifies* the pspec, by adding enable/disable signals
 363         # for parts of the rest of the core
 364         self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
 365         self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
 366         #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
 367         if self.jtag_en:
 368             # XXX MUST keep this up-to-date with litex, and
 369             # soc-cocotb-sim, and err.. all needs sorting out, argh
 370             subset = ['uart',
 371                       'mtwi',
 372                       'eint', 'gpio', 'mspi0',
 373                       # 'mspi1', - disabled for now
 374                       # 'pwm', 'sd0', - disabled for now
 375                        'sdr']
 376             self.jtag = JTAG(get_pinspecs(subset=subset),
 377                              domain=self.dbg_domain)
 378             # add signals to pspec to enable/disable icache and dcache
 379             # (or data and intstruction wishbone if icache/dcache not included)
 380             # https://bugs.libre-soc.org/show_bug.cgi?id=520
 381             # TODO: do we actually care if these are not domain-synchronised?
 382             # honestly probably not.
 383             pspec.wb_icache_en = self.jtag.wb_icache_en
 384             pspec.wb_dcache_en = self.jtag.wb_dcache_en
 385             self.wb_sram_en = self.jtag.wb_sram_en
 386         else:
 387             self.wb_sram_en = Const(1)
 388
 389         # add 4k sram blocks?
 390         self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
 391                          pspec.sram4x4kblock == True)
 392         if self.sram4x4k:
 393             self.sram4k = []
 394             for i in range(4):
 395                 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
 396                                                     #features={'err'}
 397                                                     ))
 398
 399         # add interrupt controller?
 400         self.xics = hasattr(pspec, "xics") and pspec.xics == True
 401         if self.xics:
 402             self.xics_icp = XICS_ICP()
 403             self.xics_ics = XICS_ICS()
 404             self.int_level_i = self.xics_ics.int_level_i
 405
 406         # add GPIO peripheral?
 407         self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
 408         if self.gpio:
 409             self.simple_gpio = SimpleGPIO()
 410             self.gpio_o = self.simple_gpio.gpio_o
 411
 412         # main instruction core.  suitable for prototyping / demo only
 413         self.core = core = NonProductionCore(pspec)
 414         self.core_rst = ResetSignal("coresync")
 415
 416         # instruction decoder.  goes into Trap Record
 417         #pdecode = create_pdecode()
 418         self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
 419         self.pdecode2 = PowerDecode2(None, state=self.cur_state,
 420                                      opkls=IssuerDecode2ToOperand,
 421                                      svp64_en=self.svp64_en,
 422                                      regreduce_en=self.regreduce_en)
 423         pdecode = self.pdecode2.dec
 424
 425         if self.svp64_en:
 426             self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
 427
 428         # Test Instruction memory
 429         self.imem = ConfigFetchUnit(pspec).fu
 430
 431         # DMI interface
 432         self.dbg = CoreDebug()
 433
 434         # instruction go/monitor
 435         self.pc_o = Signal(64, reset_less=True)
 436         self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
 437         self.svstate_i = Data(64, "svstate_i") # ditto
 438         self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
 439         self.busy_o = Signal(reset_less=True)
 440         self.memerr_o = Signal(reset_less=True)
 441
 442         # STATE regfile read /write ports for PC, MSR, SVSTATE
 443         staterf = self.core.regs.rf['state']
 444         self.state_r_pc = staterf.r_ports['cia'] # PC rd
 445         self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
 446         self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
 447         self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
 448
 449         # DMI interface access
 450         intrf = self.core.regs.rf['int']
 451         crrf = self.core.regs.rf['cr']
 452         xerrf = self.core.regs.rf['xer']
 453         self.int_r = intrf.r_ports['dmi'] # INT read
 454         self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
 455         self.xer_r = xerrf.r_ports['full_xer'] # XER read
 456
 457         if self.svp64_en:
 458             # for predication
 459             self.int_pred = intrf.r_ports['pred'] # INT predicate read
 460             self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
 461
 462         # hack method of keeping an eye on whether branch/trap set the PC
 463         self.state_nia = self.core.regs.rf['state'].w_ports['nia']
 464         self.state_nia.wen.name = 'state_nia_wen'
 465
 466         # pulse to synchronize the simulator at instruction end
 467         self.insn_done = Signal()
 468
 469         # indicate any instruction still outstanding, in execution
 470         self.any_busy = Signal()
 471
 472         if self.svp64_en:
 473             # store copies of predicate masks
 474             self.srcmask = Signal(64)
 475             self.dstmask = Signal(64)
 476
 477     def fetch_predicate_fsm(self, m,
 478                             pred_insn_i_valid, pred_insn_o_ready,
 479                             pred_mask_o_valid, pred_mask_i_ready):
 480         """fetch_predicate_fsm - obtains (constructs in the case of CR)
 481            src/dest predicate masks
 482
 483         https://bugs.libre-soc.org/show_bug.cgi?id=617
 484         the predicates can be read here, by using IntRegs r_ports['pred']
 485         or CRRegs r_ports['pred'].  in the case of CRs it will have to
 486         be done through multiple reads, extracting one relevant at a time.
 487         later, a faster way would be to use the 32-bit-wide CR port but
 488         this is more complex decoding, here.  equivalent code used in
 489         ISACaller is "from openpower.decoder.isa.caller import get_predcr"
 490
 491         note: this ENTIRE FSM is not to be called when svp64 is disabled
 492         """
 493         comb = m.d.comb
 494         sync = m.d.sync
 495         pdecode2 = self.pdecode2
 496         rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
 497         predmode = rm_dec.predmode
 498         srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
 499         cr_pred, int_pred = self.cr_pred, self.int_pred   # read regfiles
 500         # get src/dst step, so we can skip already used mask bits
 501         cur_state = self.cur_state
 502         srcstep = cur_state.svstate.srcstep
 503         dststep = cur_state.svstate.dststep
 504         cur_vl = cur_state.svstate.vl
 505
 506         # decode predicates
 507         sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
 508         dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
 509         sidx, scrinvert = get_predcr(m, srcpred, 's')
 510         didx, dcrinvert = get_predcr(m, dstpred, 'd')
 511
 512         # store fetched masks, for either intpred or crpred
 513         # when src/dst step is not zero, the skipped mask bits need to be
 514         # shifted-out, before actually storing them in src/dest mask
 515         new_srcmask = Signal(64, reset_less=True)
 516         new_dstmask = Signal(64, reset_less=True)
 517
 518         with m.FSM(name="fetch_predicate"):
 519
 520             with m.State("FETCH_PRED_IDLE"):
 521                 comb += pred_insn_o_ready.eq(1)
 522                 with m.If(pred_insn_i_valid):
 523                     with m.If(predmode == SVP64PredMode.INT):
 524                         # skip fetching destination mask register, when zero
 525                         with m.If(dall1s):
 526                             sync += new_dstmask.eq(-1)
 527                             # directly go to fetch source mask register
 528                             # guaranteed not to be zero (otherwise predmode
 529                             # would be SVP64PredMode.ALWAYS, not INT)
 530                             comb += int_pred.addr.eq(sregread)
 531                             comb += int_pred.ren.eq(1)
 532                             m.next = "INT_SRC_READ"
 533                         # fetch destination predicate register
 534                         with m.Else():
 535                             comb += int_pred.addr.eq(dregread)
 536                             comb += int_pred.ren.eq(1)
 537                             m.next = "INT_DST_READ"
 538                     with m.Elif(predmode == SVP64PredMode.CR):
 539                         # go fetch masks from the CR register file
 540                         sync += new_srcmask.eq(0)
 541                         sync += new_dstmask.eq(0)
 542                         m.next = "CR_READ"
 543                     with m.Else():
 544                         sync += self.srcmask.eq(-1)
 545                         sync += self.dstmask.eq(-1)
 546                         m.next = "FETCH_PRED_DONE"
 547
 548             with m.State("INT_DST_READ"):
 549                 # store destination mask
 550                 inv = Repl(dinvert, 64)
 551                 with m.If(dunary):
 552                     # set selected mask bit for 1<<r3 mode
 553                     dst_shift = Signal(range(64))
 554                     comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
 555                     sync += new_dstmask.eq(1 << dst_shift)
 556                 with m.Else():
 557                     # invert mask if requested
 558                     sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
 559                 # skip fetching source mask register, when zero
 560                 with m.If(sall1s):
 561                     sync += new_srcmask.eq(-1)
 562                     m.next = "FETCH_PRED_SHIFT_MASK"
 563                 # fetch source predicate register
 564                 with m.Else():
 565                     comb += int_pred.addr.eq(sregread)
 566                     comb += int_pred.ren.eq(1)
 567                     m.next = "INT_SRC_READ"
 568
 569             with m.State("INT_SRC_READ"):
 570                 # store source mask
 571                 inv = Repl(sinvert, 64)
 572                 with m.If(sunary):
 573                     # set selected mask bit for 1<<r3 mode
 574                     src_shift = Signal(range(64))
 575                     comb += src_shift.eq(self.int_pred.o_data & 0b111111)
 576                     sync += new_srcmask.eq(1 << src_shift)
 577                 with m.Else():
 578                     # invert mask if requested
 579                     sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
 580                 m.next = "FETCH_PRED_SHIFT_MASK"
 581
 582             # fetch masks from the CR register file
 583             # implements the following loop:
 584             # idx, inv = get_predcr(mask)
 585             # mask = 0
 586             # for cr_idx in range(vl):
 587             #     cr = crl[cr_idx + SVP64CROffs.CRPred]  # takes one cycle
 588             #     if cr[idx] ^ inv:
 589             #         mask |= 1 << cr_idx
 590             # return mask
 591             with m.State("CR_READ"):
 592                 # CR index to be read, which will be ready by the next cycle
 593                 cr_idx = Signal.like(cur_vl, reset_less=True)
 594                 # submit the read operation to the regfile
 595                 with m.If(cr_idx != cur_vl):
 596                     # the CR read port is unary ...
 597                     # ren = 1 << cr_idx
 598                     # ... in MSB0 convention ...
 599                     # ren = 1 << (7 - cr_idx)
 600                     # ... and with an offset:
 601                     # ren = 1 << (7 - off - cr_idx)
 602                     idx = SVP64CROffs.CRPred + cr_idx
 603                     comb += cr_pred.ren.eq(1 << (7 - idx))
 604                     # signal data valid in the next cycle
 605                     cr_read = Signal(reset_less=True)
 606                     sync += cr_read.eq(1)
 607                     # load the next index
 608                     sync += cr_idx.eq(cr_idx + 1)
 609                 with m.Else():
 610                     # exit on loop end
 611                     sync += cr_read.eq(0)
 612                     sync += cr_idx.eq(0)
 613                     m.next = "FETCH_PRED_SHIFT_MASK"
 614                 with m.If(cr_read):
 615                     # compensate for the one cycle delay on the regfile
 616                     cur_cr_idx = Signal.like(cur_vl)
 617                     comb += cur_cr_idx.eq(cr_idx - 1)
 618                     # read the CR field, select the appropriate bit
 619                     cr_field = Signal(4)
 620                     scr_bit = Signal()
 621                     dcr_bit = Signal()
 622                     comb += cr_field.eq(cr_pred.o_data)
 623                     comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
 624                     comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
 625                     # set the corresponding mask bit
 626                     bit_to_set = Signal.like(self.srcmask)
 627                     comb += bit_to_set.eq(1 << cur_cr_idx)
 628                     with m.If(scr_bit):
 629                         sync += new_srcmask.eq(new_srcmask | bit_to_set)
 630                     with m.If(dcr_bit):
 631                         sync += new_dstmask.eq(new_dstmask | bit_to_set)
 632
 633             with m.State("FETCH_PRED_SHIFT_MASK"):
 634                 # shift-out skipped mask bits
 635                 sync += self.srcmask.eq(new_srcmask >> srcstep)
 636                 sync += self.dstmask.eq(new_dstmask >> dststep)
 637                 m.next = "FETCH_PRED_DONE"
 638
 639             with m.State("FETCH_PRED_DONE"):
 640                 comb += pred_mask_o_valid.eq(1)
 641                 with m.If(pred_mask_i_ready):
 642                     m.next = "FETCH_PRED_IDLE"
 643
 644     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
 645                   dbg, core_rst, is_svp64_mode,
 646                   fetch_pc_o_ready, fetch_pc_i_valid,
 647                   fetch_insn_o_valid, fetch_insn_i_ready,
 648                   pred_insn_i_valid, pred_insn_o_ready,
 649                   pred_mask_o_valid, pred_mask_i_ready,
 650                   exec_insn_i_valid, exec_insn_o_ready,
 651                   exec_pc_o_valid, exec_pc_i_ready):
 652         """issue FSM
 653
 654         decode / issue FSM.  this interacts with the "fetch" FSM
 655         through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
 656         (outgoing). also interacts with the "execute" FSM
 657         through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
 658         (incoming).
 659         SVP64 RM prefixes have already been set up by the
 660         "fetch" phase, so execute is fairly straightforward.
 661         """
 662
 663         comb = m.d.comb
 664         sync = m.d.sync
 665         pdecode2 = self.pdecode2
 666         cur_state = self.cur_state
 667
 668         # temporaries
 669         dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
 670
 671         # for updating svstate (things like srcstep etc.)
 672         update_svstate = Signal() # set this (below) if updating
 673         new_svstate = SVSTATERec("new_svstate")
 674         comb += new_svstate.eq(cur_state.svstate)
 675
 676         # precalculate srcstep+1 and dststep+1
 677         cur_srcstep = cur_state.svstate.srcstep
 678         cur_dststep = cur_state.svstate.dststep
 679         next_srcstep = Signal.like(cur_srcstep)
 680         next_dststep = Signal.like(cur_dststep)
 681         comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
 682         comb += next_dststep.eq(cur_state.svstate.dststep+1)
 683
 684         # note if an exception happened.  in a pipelined or OoO design
 685         # this needs to be accompanied by "shadowing" (or stalling)
 686         exc_happened = self.core.o.exc_happened
 687
 688         with m.FSM(name="issue_fsm"):
 689
 690             # sync with the "fetch" phase which is reading the instruction
 691             # at this point, there is no instruction running, that
 692             # could inadvertently update the PC.
 693             with m.State("ISSUE_START"):
 694                 # wait on "core stop" release, before next fetch
 695                 # need to do this here, in case we are in a VL==0 loop
 696                 with m.If(~dbg.core_stop_o & ~core_rst):
 697                     comb += fetch_pc_i_valid.eq(1) # tell fetch to start
 698                     with m.If(fetch_pc_o_ready):   # fetch acknowledged us
 699                         m.next = "INSN_WAIT"
 700                 with m.Else():
 701                     # tell core it's stopped, and acknowledge debug handshake
 702                     comb += dbg.core_stopped_i.eq(1)
 703                     # while stopped, allow updating the PC and SVSTATE
 704                     with m.If(self.pc_i.ok):
 705                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 706                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 707                         sync += pc_changed.eq(1)
 708                     with m.If(self.svstate_i.ok):
 709                         comb += new_svstate.eq(self.svstate_i.data)
 710                         comb += update_svstate.eq(1)
 711                         sync += sv_changed.eq(1)
 712
 713             # wait for an instruction to arrive from Fetch
 714             with m.State("INSN_WAIT"):
 715                 if self.allow_overlap:
 716                     stopping = dbg.stopping_o
 717                 else:
 718                     stopping = Const(0)
 719                 with m.If(stopping):
 720                     # stopping: jump back to idle
 721                     m.next = "ISSUE_START"
 722                 with m.Else():
 723                     comb += fetch_insn_i_ready.eq(1)
 724                     with m.If(fetch_insn_o_valid):
 725                         # loop into ISSUE_START if it's a SVP64 instruction
 726                         # and VL == 0.  this because VL==0 is a for-loop
 727                         # from 0 to 0 i.e. always, always a NOP.
 728                         cur_vl = cur_state.svstate.vl
 729                         with m.If(is_svp64_mode & (cur_vl == 0)):
 730                             # update the PC before fetching the next instruction
 731                             # since we are in a VL==0 loop, no instruction was
 732                             # executed that we could be overwriting
 733                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 734                             comb += self.state_w_pc.i_data.eq(nia)
 735                             comb += self.insn_done.eq(1)
 736                             m.next = "ISSUE_START"
 737                         with m.Else():
 738                             if self.svp64_en:
 739                                 m.next = "PRED_START"  # fetching predicate
 740                             else:
 741                                 m.next = "DECODE_SV"  # skip predication
 742
 743             with m.State("PRED_START"):
 744                 comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
 745                 with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
 746                     m.next = "MASK_WAIT"
 747
 748             with m.State("MASK_WAIT"):
 749                 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
 750                 with m.If(pred_mask_o_valid): # predication masks are ready
 751                     m.next = "PRED_SKIP"
 752
 753             # skip zeros in predicate
 754             with m.State("PRED_SKIP"):
 755                 with m.If(~is_svp64_mode):
 756                     m.next = "DECODE_SV"  # nothing to do
 757                 with m.Else():
 758                     if self.svp64_en:
 759                         pred_src_zero = pdecode2.rm_dec.pred_sz
 760                         pred_dst_zero = pdecode2.rm_dec.pred_dz
 761
 762                         # new srcstep, after skipping zeros
 763                         skip_srcstep = Signal.like(cur_srcstep)
 764                         # value to be added to the current srcstep
 765                         src_delta = Signal.like(cur_srcstep)
 766                         # add leading zeros to srcstep, if not in zero mode
 767                         with m.If(~pred_src_zero):
 768                             # priority encoder (count leading zeros)
 769                             # append guard bit, in case the mask is all zeros
 770                             pri_enc_src = PriorityEncoder(65)
 771                             m.submodules.pri_enc_src = pri_enc_src
 772                             comb += pri_enc_src.i.eq(Cat(self.srcmask,
 773                                                          Const(1, 1)))
 774                             comb += src_delta.eq(pri_enc_src.o)
 775                         # apply delta to srcstep
 776                         comb += skip_srcstep.eq(cur_srcstep + src_delta)
 777                         # shift-out all leading zeros from the mask
 778                         # plus the leading "one" bit
 779                         # TODO count leading zeros and shift-out the zero
 780                         #      bits, in the same step, in hardware
 781                         sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
 782
 783                         # same as above, but for dststep
 784                         skip_dststep = Signal.like(cur_dststep)
 785                         dst_delta = Signal.like(cur_dststep)
 786                         with m.If(~pred_dst_zero):
 787                             pri_enc_dst = PriorityEncoder(65)
 788                             m.submodules.pri_enc_dst = pri_enc_dst
 789                             comb += pri_enc_dst.i.eq(Cat(self.dstmask,
 790                                                          Const(1, 1)))
 791                             comb += dst_delta.eq(pri_enc_dst.o)
 792                         comb += skip_dststep.eq(cur_dststep + dst_delta)
 793                         sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
 794
 795                         # TODO: initialize mask[VL]=1 to avoid passing past VL
 796                         with m.If((skip_srcstep >= cur_vl) |
 797                                   (skip_dststep >= cur_vl)):
 798                             # end of VL loop. Update PC and reset src/dst step
 799                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 800                             comb += self.state_w_pc.i_data.eq(nia)
 801                             comb += new_svstate.srcstep.eq(0)
 802                             comb += new_svstate.dststep.eq(0)
 803                             comb += update_svstate.eq(1)
 804                             # synchronize with the simulator
 805                             comb += self.insn_done.eq(1)
 806                             # go back to Issue
 807                             m.next = "ISSUE_START"
 808                         with m.Else():
 809                             # update new src/dst step
 810                             comb += new_svstate.srcstep.eq(skip_srcstep)
 811                             comb += new_svstate.dststep.eq(skip_dststep)
 812                             comb += update_svstate.eq(1)
 813                             # proceed to Decode
 814                             m.next = "DECODE_SV"
 815
 816                         # pass predicate mask bits through to satellite decoders
 817                         # TODO: for SIMD this will be *multiple* bits
 818                         sync += core.i.sv_pred_sm.eq(self.srcmask[0])
 819                         sync += core.i.sv_pred_dm.eq(self.dstmask[0])
 820
 821             # after src/dst step have been updated, we are ready
 822             # to decode the instruction
 823             with m.State("DECODE_SV"):
 824                 # decode the instruction
 825                 sync += core.i.e.eq(pdecode2.e)
 826                 sync += core.i.state.eq(cur_state)
 827                 sync += core.i.raw_insn_i.eq(dec_opcode_i)
 828                 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
 829                 if self.svp64_en:
 830                     sync += core.i.sv_rm.eq(pdecode2.sv_rm)
 831                     # set RA_OR_ZERO detection in satellite decoders
 832                     sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
 833                     # and svp64 detection
 834                     sync += core.i.is_svp64_mode.eq(is_svp64_mode)
 835                     # and svp64 bit-rev'd ldst mode
 836                     ldst_dec = pdecode2.use_svp64_ldst_dec
 837                     sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
 838                 # after decoding, reset any previous exception condition,
 839                 # allowing it to be set again during the next execution
 840                 sync += pdecode2.ldst_exc.eq(0)
 841
 842                 m.next = "INSN_EXECUTE"  # move to "execute"
 843
 844             # handshake with execution FSM, move to "wait" once acknowledged
 845             with m.State("INSN_EXECUTE"):
 846                 comb += exec_insn_i_valid.eq(1) # trigger execute
 847                 with m.If(exec_insn_o_ready):   # execute acknowledged us
 848                     m.next = "EXECUTE_WAIT"
 849
 850             with m.State("EXECUTE_WAIT"):
 851                 # wait on "core stop" release, at instruction end
 852                 # need to do this here, in case we are in a VL>1 loop
 853                 with m.If(~dbg.core_stop_o & ~core_rst):
 854                     comb += exec_pc_i_ready.eq(1)
 855                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
 856                     # the exception info needs to be blatted into
 857                     # pdecode.ldst_exc, and the instruction "re-run".
 858                     # when ldst_exc.happened is set, the PowerDecoder2
 859                     # reacts very differently: it re-writes the instruction
 860                     # with a "trap" (calls PowerDecoder2.trap()) which
 861                     # will *overwrite* whatever was requested and jump the
 862                     # PC to the exception address, as well as alter MSR.
 863                     # nothing else needs to be done other than to note
 864                     # the change of PC and MSR (and, later, SVSTATE)
 865                     with m.If(exc_happened):
 866                         sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
 867
 868                     with m.If(exec_pc_o_valid):
 869
 870                         # was this the last loop iteration?
 871                         is_last = Signal()
 872                         cur_vl = cur_state.svstate.vl
 873                         comb += is_last.eq(next_srcstep == cur_vl)
 874
 875                         # return directly to Decode if Execute generated an
 876                         # exception.
 877                         with m.If(pdecode2.ldst_exc.happened):
 878                             m.next = "DECODE_SV"
 879
 880                         # if either PC or SVSTATE were changed by the previous
 881                         # instruction, go directly back to Fetch, without
 882                         # updating either PC or SVSTATE
 883                         with m.Elif(pc_changed | sv_changed):
 884                             m.next = "ISSUE_START"
 885
 886                         # also return to Fetch, when no output was a vector
 887                         # (regardless of SRCSTEP and VL), or when the last
 888                         # instruction was really the last one of the VL loop
 889                         with m.Elif((~pdecode2.loop_continue) | is_last):
 890                             # before going back to fetch, update the PC state
 891                             # register with the NIA.
 892                             # ok here we are not reading the branch unit.
 893                             # TODO: this just blithely overwrites whatever
 894                             #       pipeline updated the PC
 895                             comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 896                             comb += self.state_w_pc.i_data.eq(nia)
 897                             # reset SRCSTEP before returning to Fetch
 898                             if self.svp64_en:
 899                                 with m.If(pdecode2.loop_continue):
 900                                     comb += new_svstate.srcstep.eq(0)
 901                                     comb += new_svstate.dststep.eq(0)
 902                                     comb += update_svstate.eq(1)
 903                             else:
 904                                 comb += new_svstate.srcstep.eq(0)
 905                                 comb += new_svstate.dststep.eq(0)
 906                                 comb += update_svstate.eq(1)
 907                             m.next = "ISSUE_START"
 908
 909                         # returning to Execute? then, first update SRCSTEP
 910                         with m.Else():
 911                             comb += new_svstate.srcstep.eq(next_srcstep)
 912                             comb += new_svstate.dststep.eq(next_dststep)
 913                             comb += update_svstate.eq(1)
 914                             # return to mask skip loop
 915                             m.next = "PRED_SKIP"
 916
 917                 with m.Else():
 918                     comb += dbg.core_stopped_i.eq(1)
 919                     # while stopped, allow updating the PC and SVSTATE
 920                     with m.If(self.pc_i.ok):
 921                         comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
 922                         comb += self.state_w_pc.i_data.eq(self.pc_i.data)
 923                         sync += pc_changed.eq(1)
 924                     with m.If(self.svstate_i.ok):
 925                         comb += new_svstate.eq(self.svstate_i.data)
 926                         comb += update_svstate.eq(1)
 927                         sync += sv_changed.eq(1)
 928
 929         # check if svstate needs updating: if so, write it to State Regfile
 930         with m.If(update_svstate):
 931             comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
 932             comb += self.state_w_sv.i_data.eq(new_svstate)
 933             sync += cur_state.svstate.eq(new_svstate) # for next clock
 934
 935     def execute_fsm(self, m, core, pc_changed, sv_changed,
 936                     exec_insn_i_valid, exec_insn_o_ready,
 937                     exec_pc_o_valid, exec_pc_i_ready):
 938         """execute FSM
 939
 940         execute FSM. this interacts with the "issue" FSM
 941         through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
 942         (outgoing). SVP64 RM prefixes have already been set up by the
 943         "issue" phase, so execute is fairly straightforward.
 944         """
 945
 946         comb = m.d.comb
 947         sync = m.d.sync
 948         pdecode2 = self.pdecode2
 949
 950         # temporaries
 951         core_busy_o = core.n.o_data.busy_o # core is busy
 952         core_ivalid_i = core.p.i_valid              # instruction is valid
 953
 954         with m.FSM(name="exec_fsm"):
 955
 956             # waiting for instruction bus (stays there until not busy)
 957             with m.State("INSN_START"):
 958                 comb += exec_insn_o_ready.eq(1)
 959                 with m.If(exec_insn_i_valid):
 960                     comb += core_ivalid_i.eq(1)  # instruction is valid/issued
 961                     sync += sv_changed.eq(0)
 962                     sync += pc_changed.eq(0)
 963                     with m.If(core.p.o_ready): # only move if accepted
 964                         m.next = "INSN_ACTIVE"  # move to "wait completion"
 965
 966             # instruction started: must wait till it finishes
 967             with m.State("INSN_ACTIVE"):
 968                 # note changes to PC and SVSTATE
 969                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
 970                     sync += sv_changed.eq(1)
 971                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
 972                     sync += pc_changed.eq(1)
 973                 with m.If(~core_busy_o): # instruction done!
 974                     comb += exec_pc_o_valid.eq(1)
 975                     with m.If(exec_pc_i_ready):
 976                         # when finished, indicate "done".
 977                         # however, if there was an exception, the instruction
 978                         # is *not* yet done.  this is an implementation
 979                         # detail: we choose to implement exceptions by
 980                         # taking the exception information from the LDST
 981                         # unit, putting that *back* into the PowerDecoder2,
 982                         # and *re-running the entire instruction*.
 983                         # if we erroneously indicate "done" here, it is as if
 984                         # there were *TWO* instructions:
 985                         # 1) the failed LDST 2) a TRAP.
 986                         with m.If(~pdecode2.ldst_exc.happened):
 987                             comb += self.insn_done.eq(1)
 988                         m.next = "INSN_START"  # back to fetch
 989
 990     def setup_peripherals(self, m):
 991         comb, sync = m.d.comb, m.d.sync
 992
 993         # okaaaay so the debug module must be in coresync clock domain
 994         # but NOT its reset signal. to cope with this, set every single
 995         # submodule explicitly in coresync domain, debug and JTAG
 996         # in their own one but using *external* reset.
 997         csd = DomainRenamer("coresync")
 998         dbd = DomainRenamer(self.dbg_domain)
 999
1000         m.submodules.core = core = csd(self.core)
1001         m.submodules.imem = imem = csd(self.imem)
1002         m.submodules.dbg = dbg = dbd(self.dbg)
1003         if self.jtag_en:
1004             m.submodules.jtag = jtag = dbd(self.jtag)
1005             # TODO: UART2GDB mux, here, from external pin
1006             # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1007             sync += dbg.dmi.connect_to(jtag.dmi)
1008
1009         cur_state = self.cur_state
1010
1011         # 4x 4k SRAM blocks.  these simply "exist", they get routed in litex
1012         if self.sram4x4k:
1013             for i, sram in enumerate(self.sram4k):
1014                 m.submodules["sram4k_%d" % i] = csd(sram)
1015                 comb += sram.enable.eq(self.wb_sram_en)
1016
1017         # XICS interrupt handler
1018         if self.xics:
1019             m.submodules.xics_icp = icp = csd(self.xics_icp)
1020             m.submodules.xics_ics = ics = csd(self.xics_ics)
1021             comb += icp.ics_i.eq(ics.icp_o)           # connect ICS to ICP
1022             sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1023
1024         # GPIO test peripheral
1025         if self.gpio:
1026             m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1027
1028         # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1029         # XXX causes litex ECP5 test to get wrong idea about input and output
1030         # (but works with verilator sim *sigh*)
1031         #if self.gpio and self.xics:
1032         #   comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1033
1034         # instruction decoder
1035         pdecode = create_pdecode()
1036         m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1037         if self.svp64_en:
1038             m.submodules.svp64 = svp64 = csd(self.svp64)
1039
1040         # convenience
1041         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1042         intrf = self.core.regs.rf['int']
1043
1044         # clock delay power-on reset
1045         cd_por  = ClockDomain(reset_less=True)
1046         cd_sync = ClockDomain()
1047         core_sync = ClockDomain("coresync")
1048         m.domains += cd_por, cd_sync, core_sync
1049         if self.dbg_domain != "sync":
1050             dbg_sync = ClockDomain(self.dbg_domain)
1051             m.domains += dbg_sync
1052
1053         ti_rst = Signal(reset_less=True)
1054         delay = Signal(range(4), reset=3)
1055         with m.If(delay != 0):
1056             m.d.por += delay.eq(delay - 1)
1057         comb += cd_por.clk.eq(ClockSignal())
1058
1059         # power-on reset delay
1060         core_rst = ResetSignal("coresync")
1061         comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1062         comb += core_rst.eq(ti_rst)
1063
1064         # debug clock is same as coresync, but reset is *main external*
1065         if self.dbg_domain != "sync":
1066             dbg_rst = ResetSignal(self.dbg_domain)
1067             comb += dbg_rst.eq(ResetSignal())
1068
1069         # busy/halted signals from core
1070         core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1071         comb += self.busy_o.eq(core_busy_o)
1072         comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1073
1074         # temporary hack: says "go" immediately for both address gen and ST
1075         l0 = core.l0
1076         ldst = core.fus.fus['ldst0']
1077         st_go_edge = rising_edge(m, ldst.st.rel_o)
1078         m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1079         m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1080
1081     def elaborate(self, platform):
1082         m = Module()
1083         # convenience
1084         comb, sync = m.d.comb, m.d.sync
1085         cur_state = self.cur_state
1086         pdecode2 = self.pdecode2
1087         dbg = self.dbg
1088         core = self.core
1089
1090         # set up peripherals and core
1091         core_rst = self.core_rst
1092         self.setup_peripherals(m)
1093
1094         # reset current state if core reset requested
1095         with m.If(core_rst):
1096             m.d.sync += self.cur_state.eq(0)
1097
1098         # PC and instruction from I-Memory
1099         comb += self.pc_o.eq(cur_state.pc)
1100         pc_changed = Signal() # note write to PC
1101         sv_changed = Signal() # note write to SVSTATE
1102
1103         # indicate to outside world if any FU is still executing
1104         comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1105
1106         # read state either from incoming override or from regfile
1107         # TODO: really should be doing MSR in the same way
1108         pc = state_get(m, core_rst, self.pc_i,
1109                             "pc",                  # read PC
1110                             self.state_r_pc, StateRegs.PC)
1111         svstate = state_get(m, core_rst, self.svstate_i,
1112                             "svstate",   # read SVSTATE
1113                             self.state_r_sv, StateRegs.SVSTATE)
1114
1115         # don't write pc every cycle
1116         comb += self.state_w_pc.wen.eq(0)
1117         comb += self.state_w_pc.i_data.eq(0)
1118
1119         # address of the next instruction, in the absence of a branch
1120         # depends on the instruction size
1121         nia = Signal(64)
1122
1123         # connect up debug signals
1124         # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1125         comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1126         comb += dbg.state.pc.eq(pc)
1127         comb += dbg.state.svstate.eq(svstate)
1128         comb += dbg.state.msr.eq(cur_state.msr)
1129
1130         # pass the prefix mode from Fetch to Issue, so the latter can loop
1131         # on VL==0
1132         is_svp64_mode = Signal()
1133
1134         # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1135         # issue, decode/execute, now joined by "Predicate fetch/calculate".
1136         # these are the handshake signals between each
1137
1138         # fetch FSM can run as soon as the PC is valid
1139         fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1140         fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1141
1142         # fetch FSM hands over the instruction to be decoded / issued
1143         fetch_insn_o_valid = Signal()
1144         fetch_insn_i_ready = Signal()
1145
1146         # predicate fetch FSM decodes and fetches the predicate
1147         pred_insn_i_valid = Signal()
1148         pred_insn_o_ready = Signal()
1149
1150         # predicate fetch FSM delivers the masks
1151         pred_mask_o_valid = Signal()
1152         pred_mask_i_ready = Signal()
1153
1154         # issue FSM delivers the instruction to the be executed
1155         exec_insn_i_valid = Signal()
1156         exec_insn_o_ready = Signal()
1157
1158         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1159         exec_pc_o_valid = Signal()
1160         exec_pc_i_ready = Signal()
1161
1162         # the FSMs here are perhaps unusual in that they detect conditions
1163         # then "hold" information, combinatorially, for the core
1164         # (as opposed to using sync - which would be on a clock's delay)
1165         # this includes the actual opcode, valid flags and so on.
1166
1167         # Fetch, then predicate fetch, then Issue, then Execute.
1168         # Issue is where the VL for-loop # lives.  the ready/valid
1169         # signalling is used to communicate between the four.
1170
1171         # set up Fetch FSM
1172         fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1173                         self.imem, core_rst, pdecode2, cur_state,
1174                        dbg, core, pc, svstate, nia, is_svp64_mode)
1175         m.submodules.fetch = fetch
1176         comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1177         comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1178         comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1179         comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1180
1181         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1182                        dbg, core_rst, is_svp64_mode,
1183                        fetch_pc_o_ready, fetch_pc_i_valid,
1184                        fetch_insn_o_valid, fetch_insn_i_ready,
1185                        pred_insn_i_valid, pred_insn_o_ready,
1186                        pred_mask_o_valid, pred_mask_i_ready,
1187                        exec_insn_i_valid, exec_insn_o_ready,
1188                        exec_pc_o_valid, exec_pc_i_ready)
1189
1190         if self.svp64_en:
1191             self.fetch_predicate_fsm(m,
1192                                      pred_insn_i_valid, pred_insn_o_ready,
1193                                      pred_mask_o_valid, pred_mask_i_ready)
1194
1195         self.execute_fsm(m, core, pc_changed, sv_changed,
1196                          exec_insn_i_valid, exec_insn_o_ready,
1197                          exec_pc_o_valid, exec_pc_i_ready)
1198
1199         # this bit doesn't have to be in the FSM: connect up to read
1200         # regfiles on demand from DMI
1201         self.do_dmi(m, dbg)
1202
1203         # DEC and TB inc/dec FSM.  copy of DEC is put into CoreState,
1204         # (which uses that in PowerDecoder2 to raise 0x900 exception)
1205         self.tb_dec_fsm(m, cur_state.dec)
1206
1207         return m
1208
1209     def do_dmi(self, m, dbg):
1210         """deals with DMI debug requests
1211
1212         currently only provides read requests for the INT regfile, CR and XER
1213         it will later also deal with *writing* to these regfiles.
1214         """
1215         comb = m.d.comb
1216         sync = m.d.sync
1217         dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1218         intrf = self.core.regs.rf['int']
1219
1220         with m.If(d_reg.req): # request for regfile access being made
1221             # TODO: error-check this
1222             # XXX should this be combinatorial?  sync better?
1223             if intrf.unary:
1224                 comb += self.int_r.ren.eq(1<<d_reg.addr)
1225             else:
1226                 comb += self.int_r.addr.eq(d_reg.addr)
1227                 comb += self.int_r.ren.eq(1)
1228         d_reg_delay  = Signal()
1229         sync += d_reg_delay.eq(d_reg.req)
1230         with m.If(d_reg_delay):
1231             # data arrives one clock later
1232             comb += d_reg.data.eq(self.int_r.o_data)
1233             comb += d_reg.ack.eq(1)
1234
1235         # sigh same thing for CR debug
1236         with m.If(d_cr.req): # request for regfile access being made
1237             comb += self.cr_r.ren.eq(0b11111111) # enable all
1238         d_cr_delay  = Signal()
1239         sync += d_cr_delay.eq(d_cr.req)
1240         with m.If(d_cr_delay):
1241             # data arrives one clock later
1242             comb += d_cr.data.eq(self.cr_r.o_data)
1243             comb += d_cr.ack.eq(1)
1244
1245         # aaand XER...
1246         with m.If(d_xer.req): # request for regfile access being made
1247             comb += self.xer_r.ren.eq(0b111111) # enable all
1248         d_xer_delay  = Signal()
1249         sync += d_xer_delay.eq(d_xer.req)
1250         with m.If(d_xer_delay):
1251             # data arrives one clock later
1252             comb += d_xer.data.eq(self.xer_r.o_data)
1253             comb += d_xer.ack.eq(1)
1254
1255     def tb_dec_fsm(self, m, spr_dec):
1256         """tb_dec_fsm
1257
1258         this is a FSM for updating either dec or tb.  it runs alternately
1259         DEC, TB, DEC, TB.  note that SPR pipeline could have written a new
1260         value to DEC, however the regfile has "passthrough" on it so this
1261         *should* be ok.
1262
1263         see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1264         """
1265
1266         comb, sync = m.d.comb, m.d.sync
1267         fast_rf = self.core.regs.rf['fast']
1268         fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1269         fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1270
1271         with m.FSM() as fsm:
1272
1273             # initiates read of current DEC
1274             with m.State("DEC_READ"):
1275                 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1276                 comb += fast_r_dectb.ren.eq(1)
1277                 m.next = "DEC_WRITE"
1278
1279             # waits for DEC read to arrive (1 cycle), updates with new value
1280             with m.State("DEC_WRITE"):
1281                 new_dec = Signal(64)
1282                 # TODO: MSR.LPCR 32-bit decrement mode
1283                 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1284                 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1285                 comb += fast_w_dectb.wen.eq(1)
1286                 comb += fast_w_dectb.i_data.eq(new_dec)
1287                 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1288                 m.next = "TB_READ"
1289
1290             # initiates read of current TB
1291             with m.State("TB_READ"):
1292                 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1293                 comb += fast_r_dectb.ren.eq(1)
1294                 m.next = "TB_WRITE"
1295
1296             # waits for read TB to arrive, initiates write of current TB
1297             with m.State("TB_WRITE"):
1298                 new_tb = Signal(64)
1299                 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1300                 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1301                 comb += fast_w_dectb.wen.eq(1)
1302                 comb += fast_w_dectb.i_data.eq(new_tb)
1303                 m.next = "DEC_READ"
1304
1305         return m
1306
1307     def __iter__(self):
1308         yield from self.pc_i.ports()
1309         yield self.pc_o
1310         yield self.memerr_o
1311         yield from self.core.ports()
1312         yield from self.imem.ports()
1313         yield self.core_bigendian_i
1314         yield self.busy_o
1315
1316     def ports(self):
1317         return list(self)
1318
1319     def external_ports(self):
1320         ports = self.pc_i.ports()
1321         ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1322                 ]
1323
1324         if self.jtag_en:
1325             ports += list(self.jtag.external_ports())
1326         else:
1327             # don't add DMI if JTAG is enabled
1328             ports += list(self.dbg.dmi.ports())
1329
1330         ports += list(self.imem.ibus.fields.values())
1331         ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1332
1333         if self.sram4x4k:
1334             for sram in self.sram4k:
1335                 ports += list(sram.bus.fields.values())
1336
1337         if self.xics:
1338             ports += list(self.xics_icp.bus.fields.values())
1339             ports += list(self.xics_ics.bus.fields.values())
1340             ports.append(self.int_level_i)
1341
1342         if self.gpio:
1343             ports += list(self.simple_gpio.bus.fields.values())
1344             ports.append(self.gpio_o)
1345
1346         return ports
1347
1348     def ports(self):
1349         return list(self)
1350
1351
1352 class TestIssuer(Elaboratable):
1353     def __init__(self, pspec):
1354         self.ti = TestIssuerInternal(pspec)
1355         self.pll = DummyPLL(instance=True)
1356
1357         # PLL direct clock or not
1358         self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1359         if self.pll_en:
1360             self.pll_test_o = Signal(reset_less=True)
1361             self.pll_vco_o = Signal(reset_less=True)
1362             self.clk_sel_i = Signal(2, reset_less=True)
1363             self.ref_clk =  ClockSignal() # can't rename it but that's ok
1364             self.pllclk_clk = ClockSignal("pllclk")
1365
1366     def elaborate(self, platform):
1367         m = Module()
1368         comb = m.d.comb
1369
1370         # TestIssuer nominally runs at main clock, actually it is
1371         # all combinatorial internally except for coresync'd components
1372         m.submodules.ti = ti = self.ti
1373
1374         if self.pll_en:
1375             # ClockSelect runs at PLL output internal clock rate
1376             m.submodules.wrappll = pll = self.pll
1377
1378             # add clock domains from PLL
1379             cd_pll = ClockDomain("pllclk")
1380             m.domains += cd_pll
1381
1382             # PLL clock established.  has the side-effect of running clklsel
1383             # at the PLL's speed (see DomainRenamer("pllclk") above)
1384             pllclk = self.pllclk_clk
1385             comb += pllclk.eq(pll.clk_pll_o)
1386
1387             # wire up external 24mhz to PLL
1388             #comb += pll.clk_24_i.eq(self.ref_clk)
1389             # output 18 mhz PLL test signal, and analog oscillator out
1390             comb += self.pll_test_o.eq(pll.pll_test_o)
1391             comb += self.pll_vco_o.eq(pll.pll_vco_o)
1392
1393             # input to pll clock selection
1394             comb += pll.clk_sel_i.eq(self.clk_sel_i)
1395
1396             # now wire up ResetSignals.  don't mind them being in this domain
1397             pll_rst = ResetSignal("pllclk")
1398             comb += pll_rst.eq(ResetSignal())
1399
1400         # internal clock is set to selector clock-out.  has the side-effect of
1401         # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1402         # debug clock runs at coresync internal clock
1403         cd_coresync = ClockDomain("coresync")
1404         #m.domains += cd_coresync
1405         if self.ti.dbg_domain != 'sync':
1406             cd_dbgsync = ClockDomain("dbgsync")
1407             #m.domains += cd_dbgsync
1408         intclk = ClockSignal("coresync")
1409         dbgclk = ClockSignal(self.ti.dbg_domain)
1410         # XXX BYPASS PLL XXX
1411         # XXX BYPASS PLL XXX
1412         # XXX BYPASS PLL XXX
1413         if self.pll_en:
1414             comb += intclk.eq(self.ref_clk)
1415         else:
1416             comb += intclk.eq(ClockSignal())
1417         if self.ti.dbg_domain != 'sync':
1418             dbgclk = ClockSignal(self.ti.dbg_domain)
1419             comb += dbgclk.eq(intclk)
1420
1421         return m
1422
1423     def ports(self):
1424         return list(self.ti.ports()) + list(self.pll.ports()) + \
1425                [ClockSignal(), ResetSignal()]
1426
1427     def external_ports(self):
1428         ports = self.ti.external_ports()
1429         ports.append(ClockSignal())
1430         ports.append(ResetSignal())
1431         if self.pll_en:
1432             ports.append(self.clk_sel_i)
1433             ports.append(self.pll.clk_24_i)
1434             ports.append(self.pll_test_o)
1435             ports.append(self.pll_vco_o)
1436             ports.append(self.pllclk_clk)
1437             ports.append(self.ref_clk)
1438         return ports
1439
1440
1441 if __name__ == '__main__':
1442     units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1443              'spr': 1,
1444              'div': 1,
1445              'mul': 1,
1446              'shiftrot': 1
1447             }
1448     pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1449                          imem_ifacetype='bare_wb',
1450                          addr_wid=48,
1451                          mask_wid=8,
1452                          reg_wid=64,
1453                          units=units)
1454     dut = TestIssuer(pspec)
1455     vl = main(dut, ports=dut.ports(), name="test_issuer")
1456
1457     if len(sys.argv) == 1:
1458         vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1459         with open("test_issuer.il", "w") as f:
1460             f.write(vl)